def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'A title'},
        ],
    }

    obj = workflow_object_class.create(
        data=invalid_record,
        data_type='hep',
        id_user=1,
    )
    obj_id = obj.id

    with pytest.raises(ValidationError):
        start('article', invalid_record, obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
        "preprint_date": "Jessica Jones",
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_message = "Validation error."
    expected_error_code = "VALIDATION_ERROR"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]

    assert data["workflow"]["_extra_data"]["callback_url"]
    assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def test_workflows_halts_on_multiple_exact_matches(workflow_app):
    # Record from arxiv with just arxiv ID in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_arxiv.json", index_name="records-hep"
    )

    # Record from publisher with just DOI in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_publisher.json", index_name="records-hep"
    )

    path = pkg_resources.resource_filename(
        __name__, "fixtures/multiple_matches_arxiv_update.json"
    )
    update_from_arxiv = json.load(open(path))

    # An update from arxiv with the same arxiv and DOI as above records
    workflow_id = build_workflow(update_from_arxiv).id
    start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert len(set(obj.extra_data["matches"]["exact"])) == 2

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get('approved') is False

    es.indices.refresh('holdingpen-hep')

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data['already-in-holding-pen'] is False
    assert obj2.extra_data['previously_rejected'] is True
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_halt(app, halt_workflow, halt_workflow_conditional):
    """Test halt task."""
    assert 'halttest' in app.extensions['invenio-workflows'].workflows
    assert 'halttestcond' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = [{'foo': 'bar'}]

        eng_uuid = start('halttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status

        # Check conditional workflows and pass data not as a list (to check).
        eng_uuid = start('halttestcond', data[0])
        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
def test_task_info(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj = WorkflowObject.create({"x": 22})
        start("halttest", obj)
        ident = obj.id
        obj = WorkflowObject.get(ident)
        task_info = obj.get_current_task_info()
        assert task_info["name"] == "halt_engine"
def test_start_wf_with_no_source_data_fails(workflow_app):
    record = generate_record()

    obj = build_workflow(record)
    del obj.extra_data["source_data"]
    obj.save()
    db.session.commit()

    with pytest.raises(ValueError):
        start("article", object_id=obj.id)
def test_match_wf_in_error_goes_in_initial_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type="hep")
    obj.status = ObjectStatus.INITIAL
    obj.save()
    es.indices.refresh("holdingpen-hep")

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start("article", object_id=workflow_id)
Esempio n. 11
0
def start_merger(head_id, update_id, current_user_id=None):
    """Start a new ManualMerge workflow to merge two records manually.

    Args:
        head_id: the id of the first record to merge. This record is the one
            that will be updated with the new information.
        update_id: the id of the second record to merge. This record is the
            one that is going to be deleted and replaced by `head`.
        current_user_id: Id of the current user provided by the Flask app.

    Returns:
        (int): the current workflow object's id.
    """
    data = {
        'pid_type': 'lit',  # TODO: support
        'recid_head': head_id,
        'recid_update': update_id,
    }

    head = get_db_record('lit', head_id)
    update = get_db_record('lit', update_id)

    workflow_object = workflow_object_class.create(
        data=None,
        id_user=current_user_id,
        data_type='hep'
    )

    wf_id = workflow_object.id    # to retrieve it later
    workflow_object.extra_data.update(data)

    update_source = LiteratureReader(update).source
    update_source = update_source if update_source else 'arxiv'

    workflow_object.extra_data['update_source'] = update_source.lower()

    workflow_object.extra_data['head_control_number'] = head_id
    workflow_object.extra_data['update_control_number'] = update_id

    workflow_object.extra_data['head_uuid'] = str(head.id)
    workflow_object.extra_data['update_uuid'] = str(update.id)

    workflow_object.extra_data['head'] = head
    workflow_object.extra_data['update'] = update

    workflow_object.save()

    start('manual_merge', object_id=wf_id)

    return wf_id
def test_errors(app, error_workflow):
    """Test halt task."""
    assert 'errortest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        with pytest.raises(WorkflowsMissingData):
            start('errortest')

        with pytest.raises(WorkflowDefinitionError):
            start('doesnotexist', 100)

        with pytest.raises(WorkflowsMissingObject):
            start('errortest', object_id=-1)

        obj = WorkflowObject.create({"id": 0})
        db.session.commit()

        obj_id = obj.id
        with pytest.raises(ZeroDivisionError):
            start('errortest', object_id=obj_id)

        obj = WorkflowObject.get(obj_id)

        assert obj.known_statuses.ERROR == obj.status
        assert obj.data == {"id": 0, "foo": "bar"}
Esempio n. 13
0
def test_merge_with_disabled_merge_on_update_feature_flag(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
):

    with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id
        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        assert obj.status == ObjectStatus.COMPLETED

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('conflicts') is None
        assert obj.extra_data.get('merged') is True
        assert obj.extra_data.get('merger_root') is None
        assert obj.extra_data.get('is-update') is True

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root is None
def test_workflow_loads_from_source_data_fails_on_no_source_data(
    load_from_source_data_workflow,
    workflow_app,
    record_from_db,
):
    extra_data_without_source_data = {}
    workflow_id = workflow_object_class.create(
        data_type='hep',
        data=record_from_db,
        extra_data=extra_data_without_source_data,
    ).id

    with pytest.raises(ValueError) as exc:
        start('load_source_data', object_id=workflow_id)

    assert exc.match(r'source_data.*missing')
Esempio n. 15
0
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == {}

        # source us unknown, so no new root is saved.
        roots = read_all_wf_record_sources(factory.record_metadata.id)
        assert not roots
Esempio n. 16
0
def test_harvesting_arxiv_workflow_already_on_legacy(
    mocked_refextract_extract_refs,
    mocked_api_request_beard_block,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download,
    small_app,
    already_harvested_on_legacy_record,
):
    """Test a full harvesting workflow."""

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid = None
    with small_app.app_context():
        with mock.patch.dict(small_app.config, extra_config):
            workflow_uuid = start('article', [
                already_harvested_on_legacy_record])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.COMPLETED
        assert 'already-ingested' in obj.extra_data
        assert obj.extra_data['already-ingested']
Esempio n. 17
0
def test_merge_without_conflicts_rootful(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv')

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == ARXIV_ROOT

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root.json == RECORD_WITH_CONFLICTS
Esempio n. 18
0
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it

    obj = workflow_object_class.create(
        data_type='hep',
        **simple_record
    )
    workflow_uuid = start('article', object_id=obj.id)
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    record, categories = core_record()
    record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"]

    obj = workflow_object_class.create(
        data=record, status=ObjectStatus.COMPLETED, data_type="hep"
    )
    obj.extra_data["approved"] = False  # reject it
    obj.save()
    es.indices.refresh("holdingpen-hep")

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_id = build_workflow(record).id
            eng_uuid = start("article", object_id=workflow_id)
            eng = WorkflowEngine.from_uuid(eng_uuid)
            obj2 = eng.processed_objects[0]
            assert not obj2.extra_data["auto-approved"]
            assert len(obj2.extra_data["previously_rejected_matches"]) > 0
            assert obj2.status == ObjectStatus.COMPLETED
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(
        workflow_app,
        # id
        "Alias Investigations",
        obj.data,
        # extra_data
        "Jessica Jones",
    )
    data = json.loads(response.get_data())
    expected_message = "The workflow request is malformed."
    expected_error_code = "MALFORMED"

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]
    assert "errors" in data
Esempio n. 21
0
def test_harvesting_arxiv_workflow_accepted(
    mocked, db_only_app, record_oai_arxiv_plots):
    """Test a full harvesting workflow."""
    from invenio_workflows import (
        start, WorkflowEngine, ObjectStatus, workflow_object_class
    )
    from dojson.contrib.marc21.utils import create_record
    from invenio_db import db
    from inspirehep.dojson.hep import hep
    from inspirehep.modules.converter.xslt import convert

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(
        record_oai_arxiv_plots,
        "oaiarXiv2marcxml.xsl"
    )
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with db_only_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == "2014"
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with db_only_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Esempio n. 22
0
def test_merge_with_conflicts_rootful(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        # By default the root is {}.

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')
        assert obj.status == ObjectStatus.HALTED
        assert len(conflicts) == 1

        assert obj.extra_data.get('callback_url') is not None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == {}
Esempio n. 23
0
def start_edit_article_workflow(recid):
    try:
        record = get_db_record('lit', recid)
    except RecordGetterError:
        raise CallbackRecordNotFoundError(recid)

    record_permission = RecordPermission.create(action='update', record=record)
    if not record_permission.can():
        abort(403, record_permission)
    # has to be done before start() since, it is deattaching this session
    user_id = current_user.get_id()
    eng_uuid = start('edit_article', data=record)
    workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    workflow = workflow_object_class.get(workflow_id)
    workflow.id_user = user_id
    if request.referrer:
        base_rt_url = get_rt_link_for_ticket('').replace('?', '\?')
        ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer)
        if ticket_match:
            ticket_id = int(ticket_match.group('ticket_id'))
            workflow.extra_data['curation_ticket_id'] = ticket_id

    workflow.save()
    db.session.commit()
    url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id)
    return redirect(location=url, code=302)
def test_harvesting_arxiv_workflow_core_record_auto_accepted(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record, categories = core_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        workflow_id = build_workflow(record).id
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_uuid = start("article", object_id=workflow_id)

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.extra_data["approved"] is True
        assert obj.extra_data["auto-approved"] is True
        assert obj.data["core"] is True
Esempio n. 25
0
def edit_workflow(workflow_app):
    app_client = workflow_app.test_client()
    user = User.query.filter_by(email='*****@*****.**').one()
    login_user_via_session(app_client, user=user)

    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th'
                ],
                'value': '1802.03287'
            }
        ],
        'control_number': 123,
        'document_type': ['article'],
        'titles': [{'title': 'Resource Pooling in Large-Scale Content Delivery Systems'}],
        'self': {'$ref': 'http://localhost:5000/schemas/records/hep.json'},
        '_collections': ['Literature']
    }
    factory = TestRecordMetadata.create_from_kwargs(json=record)
    eng_uuid = start('edit_article', data=factory.record_metadata.json)
    obj = WorkflowEngine.from_uuid(eng_uuid).objects[0]

    assert obj.status == ObjectStatus.WAITING
    assert obj.extra_data['callback_url']
    return obj
Esempio n. 26
0
def test_merge_callback_url_with_malformed_workflow(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.HALTED
        assert expected_url == obj.extra_data.get('callback_url')
        assert len(conflicts) == 1

        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS

        payload = {
            'id': obj.id,
            'metadata': 'Jessica Jones',
            '_extra_data': 'Frank Castle'
        }

        with workflow_app.test_client() as client:
            response = client.put(
                obj.extra_data.get('callback_url'),
                data=json.dumps(payload),
                content_type='application/json',
            )

        data = json.loads(response.get_data())
        expected_message = 'The workflow request is malformed.'

        assert response.status_code == 400
        assert expected_message == data['message']

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.extra_data.get('callback_url') is not None
        assert obj.extra_data.get('conflicts') is not None
        assert obj.extra_data['merger_root'] is not None

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root is None
def test_equality(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj1 = WorkflowObject.create({"x": 22})
        obj2 = WorkflowObject.create({"x": 22})
        start("halttest", [obj1, obj2])

        ident1 = obj1.id
        ident2 = obj2.id

        obj1 = WorkflowObject.get(ident1)
        obj2 = WorkflowObject.get(ident2)
        assert obj1 == obj2

        obj3 = WorkflowObject.create({"x": 22})
        obj4 = WorkflowObject.create({"x": 2})
        assert obj4 != obj3
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    es.indices.refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    es.indices.refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'name': {
            'preferred_name': 'John Smith',
            'value': 'Smith, John'
        }
    }

    obj_id = build_workflow(invalid_record, data_type='authors').id

    with pytest.raises(ValidationError):
        start('author', object_id=obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']
def test_refextract_from_pdf(
    mocked_indexing_task,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services
):
    """Test refextract from PDF and reference matching for default Configuration
     by going through the entire workflow."""

    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'arxiv_eprints': [
            {
                'categories': ['quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP'],
                'value': '1308.0815'
            }
        ],
        'control_number': 1000,
        'document_type': ['article'],
        'titles': [
            {
                'source': 'arXiv',
                'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation'
            }
        ],
    }

    TestRecordMetadata.create_from_kwargs(
        json=cited_record_json, index='records-hep', pid_type='lit')
    citing_record, categories = insert_citing_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES': categories,
    }

    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    assert validate(citing_record['acquisition_source'], subschema) is None

    with mock.patch.dict(workflow_app.config, extra_config):
        workflow_id = build_workflow(citing_record).id
        citing_doc_workflow_uuid = start('article', object_id=workflow_id)

    citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid)
    citing_doc_obj = citing_doc_eng.processed_objects[0]

    assert citing_doc_obj.data['references'][7]['record']['$ref'] == 'http://localhost:5000/api/literature/1000'
    assert citing_doc_obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
Esempio n. 31
0
def test_harvesting_arxiv_workflow_accepted(mocked, small_app,
                                            record_oai_arxiv_plots):
    """Test a full harvesting workflow."""

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots,
                                             "oaiarXiv2marcxml.xsl")
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with small_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == 2014
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with small_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Esempio n. 32
0
def test_merge_without_conflicts_callback_url(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        record_update = RECORD_WITHOUT_CONFLICTS
        record_update.update({
            'arxiv_eprints':
            factory.record_metadata.json.get('arxiv_eprints')
        })

        eng_uuid = start('article', [record_update])

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.COMPLETED
        assert conflicts is None
        assert obj.extra_data.get('is-update') is True

        updated_root = read_wf_record_source(factory.record_metadata.id,
                                             'arxiv')
        assert updated_root.json == record_update

        payload = {
            'id': obj.id,
            'metadata': obj.data,
            '_extra_data': obj.extra_data
        }

        with workflow_app.test_client() as client:
            response = client.put(
                url,
                data=json.dumps(payload),
                content_type='application/json',
            )

        assert response.status_code == 400
Esempio n. 33
0
def test_workflow_checks_affiliations_if_record_is_rejected_by_curator(
    mocked_is_auto_rejected,
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    record['authors'][0]['raw_affiliations'] = [{
        "value": "IN2P3."
    }, {
        "value":
        "Some words with CErN, inside."
    }]
    record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab?"}]
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)
        wf = workflow_object_class.get(workflow_id)
        wf.extra_data['approved'] = False
        wf.save()
        wf.continue_workflow(delayed=False)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" in collections_in_record
    assert "HAL Hidden" in collections_in_record
    assert "Fermilab" in collections_in_record
    assert "Literature" not in collections_in_record
Esempio n. 34
0
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        '_collections': [
            'Literature',
        ],
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
        'preprint_date': 'Jessica Jones'
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start('article', object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data,
                                      obj.extra_data)

    expected_message = 'Validation error.'
    expected_error_code = 'VALIDATION_ERROR'
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data['error_code']
    assert expected_message == data['message']

    assert data['workflow']['_extra_data']['callback_url']
    assert len(data['workflow']['_extra_data']['validation_errors']) == 1
Esempio n. 35
0
def test_workflow_with_validation_error(
    fake_validation,
    mocked_match,
    mocked_magpie_json_api_request,
    mocked_beard_json_api_request,
    workflow_app,
    mocked_external_services,
):
    record_with_validation_error = {
        "$schema": "https://labs.inspirehep.net/schemas/records/hep.json",
        "titles": [{"title": "Update without conflicts title."}],
        "arxiv_eprints": [
            {"categories": ["WRONG_CATEGORY", "hep-th"], "value": "1703.04802"}
        ],
        "document_type": ["article"],
        "_collections": ["Literature"],
        "acquisition_source": {"source": "arXiv"},
    }
    workflow = build_workflow(record_with_validation_error)
    with pytest.raises(ValidationError):
        start("article", object_id=workflow.id)
    assert fake_validation.call_count == 2
    assert workflow.status == ObjectStatus.ERROR
Esempio n. 36
0
def test_update_record_goes_through_api_version_of_store_record_connection_timeout(
    mocked_request_in_upload,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    with mock.patch.dict(
            workflow_app.config, {
                "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True,
                "INSPIREHEP_URL": "http://go_to_wrong_address.bad__:98765"
            }):
        with pytest.raises(requests.exceptions.ConnectionError):
            start("article", object_id=workflow_id)
    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert obj.extra_data['_error_msg'].endswith("\nConnectTimeout\n") is True
Esempio n. 37
0
def test_merge_callback_url_with_malformed_workflow(workflow_app,
                                                    enable_merge_on_update,
                                                    disable_file_upload):
    factory = TestRecordMetadata.create_from_file(__name__,
                                                  'record_for_merging.json')

    record_update = RECORD_WITH_CONFLICTS
    record_update.update({
        'dois': factory.record_metadata.json.get('dois'),
    })

    eng_uuid = start('article', [record_update])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    conflicts = obj.extra_data.get('conflicts')

    expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

    assert obj.status == ObjectStatus.HALTED
    assert expected_url == obj.extra_data.get('callback_url')
    assert len(conflicts) == 1
    assert obj.extra_data.get('is-update') is True

    payload = {
        'id': obj.id,
        'metadata': 'Jessica Jones',
        '_extra_data': 'Frank Castle'
    }

    with workflow_app.test_client() as client:
        response = client.put(
            obj.extra_data.get('callback_url'),
            data=json.dumps(payload),
            content_type='application/json',
        )

    data = json.loads(response.get_data())
    expected_message = 'The workflow request is malformed.'

    assert response.status_code == 400
    assert expected_message == data['message']

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data.get('callback_url') is not None
    assert obj.extra_data.get('conflicts') is not None
Esempio n. 38
0
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'name': {
            'preferred_name': 'John Smith',
            'value': 'Smith, John'
        }
    }

    obj = workflow_object_class.create(
        data=invalid_record,
        data_type='authors',
        id_user=1,
    )
    obj_id = obj.id

    with pytest.raises(ValidationError):
        start('author', invalid_record, obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']
Esempio n. 39
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):

    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    current_search.flush_and_refresh("holdingpen-hep")

    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
Esempio n. 40
0
def test_restart(app, restart_workflow):
    """Test halt task."""
    assert 'restarttest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = {}

        eng_uuid = start('restarttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": "foo"}
        assert obj.get_action() == "foo"
        assert obj.get_action_message() == "Test"

        # Restart shall have no effect (still halted)
        new_eng_uuid = restart(eng_uuid)

        assert new_eng_uuid == eng_uuid

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
        assert obj.get_action() == "foo"

        obj.remove_action()
        assert obj.get_action() is None

        obj_id = obj.id

        # Now it should resume the next task
        resume(obj_id)

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
        assert obj.extra_data.get('test') == 'test'
        assert obj.data.get('title').get('source') == 'TEST'

        # We restart the object again
        restart(obj.workflow.uuid, data=obj)
        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
Esempio n. 41
0
def test_article_workflow_continues_when_record_is_valid(workflow_app):
    valid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR
    assert "_error_msg" not in obj.extra_data
Esempio n. 42
0
def test_merge_without_conflicts_rootful(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        record_update = RECORD_WITH_CONFLICTS
        record_update.update({
            'arxiv_eprints':
            factory.record_metadata.json.get('arxiv_eprints')
        })

        ARXIV_ROOT.update({
            'arxiv_eprints':
            factory.record_metadata.json.get('arxiv_eprints')
        })

        insert_wf_record_source(json=ARXIV_ROOT,
                                record_uuid=factory.record_metadata.id,
                                source='arxiv')

        eng_uuid = start('article', [record_update])

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True

        updated_root = read_wf_record_source(factory.record_metadata.id,
                                             'arxiv')
        assert updated_root.json == record_update
Esempio n. 43
0
def test_merge_with_disabled_merge_on_update_feature_flag(
        workflow_app, disable_file_upload):
    factory = TestRecordMetadata.create_from_file(__name__,
                                                  'record_for_merging.json')

    record_update = RECORD_WITHOUT_CONFLICTS
    record_update.update({
        '$schema': factory.record_metadata.json.get('$schema'),
        'dois': factory.record_metadata.json.get('dois'),
    })

    eng_uuid = start('article', [record_update])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data.get('callback_url') is None
    assert obj.extra_data.get('conflicts') is None
    assert obj.extra_data.get('merged') is True
Esempio n. 44
0
def test_merge_with_conflicts(workflow_app, enable_merge_on_update,
                              disable_file_upload):
    factory = TestRecordMetadata.create_from_file(__name__,
                                                  'record_for_merging.json')

    record_update = RECORD_WITH_CONFLICTS
    record_update.update({'dois': factory.record_metadata.json.get('dois')})

    eng_uuid = start('article', [record_update])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    conflicts = obj.extra_data.get('conflicts')

    assert obj.status == ObjectStatus.HALTED
    assert len(conflicts) == 1
    assert obj.extra_data.get('callback_url') is not None
    assert obj.extra_data.get('is-update') is True
Esempio n. 45
0
def test_update_exact_matched_goes_trough_the_workflow(
        mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard,
        mocked_api_request_magpie, workflow_app, mocked_external_services,
        record_from_db):
    record = record_from_db
    eng_uuid = start('article', [record])
    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data['already-in-holding-pen'] is False
    assert obj.extra_data['holdingpen_matches'] == []
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
    assert obj.extra_data['is-update']
    assert obj.extra_data['exact-matched']
    assert obj.extra_data['matches']['exact'] == [record.get('control_number')]
    assert obj.extra_data['matches']['approved'] == record.get(
        'control_number')
    assert obj.extra_data['approved']
    assert obj.status == ObjectStatus.COMPLETED
Esempio n. 46
0
def test_harvesting_arxiv_workflow_already_on_legacy(
        mocked_refextract_extract_refs, mocked_api_request_magpie,
        mocked_api_request_beard, mocked_download, small_app):
    """Test a full harvesting workflow."""
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    with small_app.app_context():
        with mock.patch.dict(small_app.config, extra_config):
            workflow_uuid = start('article',
                                  [already_harvested_on_legacy_record()])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.COMPLETED
        assert 'already-ingested' in obj.extra_data
        assert obj.extra_data['already-ingested']
def test_article_workflow_continues_when_record_is_valid(workflow_app):
    valid_record = {
        '_collections': [
            'Literature',
        ],
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'A title'},
        ],
    }

    eng_uuid = start('article', [valid_record])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR
    assert '_error_msg' not in obj.extra_data
Esempio n. 48
0
def test_validation_error_callback_with_missing_worfklow(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data)

    data = json.loads(response.get_data())
    expected_message = 'The workflow with id "1111" was not found.'
    expected_error_code = "WORKFLOW_NOT_FOUND"

    assert response.status_code == 404
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]
Esempio n. 49
0
def test_harvesting_arxiv_workflow_already_on_legacy(
        mocked_download, mocked_is_pdf, mocked_refextract_extract_refs,
        mocked_api_request_magpie, mocked_api_request_beard, workflow_app,
        mocked_external_services):
    """Test a full harvesting workflow."""
    record, categories = already_harvested_on_legacy_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY': categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_uuid = start('article', [record])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.COMPLETED
        assert 'already-ingested' in obj.extra_data
        assert obj.extra_data['already-ingested']
Esempio n. 50
0
def test_validation_error_callback_with_a_valid(workflow_app):
    valid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
Esempio n. 51
0
def test_harvesting_arxiv_workflow_core_record_auto_accepted(
        mocked_download, mocked_is_pdf, mocked_refextract_extract_refs,
        mocked_api_request_magpie, mocked_api_request_beard, workflow_app,
        mocked_external_services):
    """Test a full harvesting workflow."""
    record, categories = core_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES': categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_uuid = start('article', [record])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.extra_data['approved'] is True
        assert obj.extra_data['auto-approved'] is True
        assert obj.data['core'] is True
Esempio n. 52
0
def test_merge_without_conflicts_callback_url(workflow_app,
                                              enable_merge_on_update,
                                              disable_file_upload):
    factory = TestRecordMetadata.create_from_file(__name__,
                                                  'record_for_merging.json')

    record_update = RECORD_WITHOUT_CONFLICTS
    record_update.update({
        '$schema': factory.record_metadata.json.get('$schema'),
        'dois': factory.record_metadata.json.get('dois'),
    })

    eng_uuid = start('article', [record_update])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    conflicts = obj.extra_data.get('conflicts')

    url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

    assert conflicts is None
    assert obj.extra_data.get('is-update') is True

    payload = {
        'id': obj.id,
        'metadata': obj.data,
        '_extra_data': obj.extra_data
    }

    with workflow_app.test_client() as client:
        response = client.put(
            url,
            data=json.dumps(payload),
            content_type='application/json',
        )

    assert response.status_code == 400
Esempio n. 53
0
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it
    workflow_uuid = start('article', [simple_record])
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id

    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data=simple_record, data_type='hep')
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
Esempio n. 54
0
def test_validation_error_callback_with_malformed_with_invalid_types(
        workflow_app):
    invalid_record = {
        '_collections': [
            'Literature',
        ],
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start('article', object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(
        workflow_app,
        # id
        'Alias Investigations',
        obj.data,
        # extra_data
        'Jessica Jones')
    data = json.loads(response.get_data())
    expected_message = 'The workflow request is malformed.'
    expected_error_code = 'MALFORMED'

    assert response.status_code == 400
    assert expected_error_code == data['error_code']
    assert expected_message == data['message']
    assert 'errors' in data
Esempio n. 55
0
def test_merge_with_conflicts(workflow_app, enable_merge_on_update, record_to_merge):
    record_update = {
        '$schema': 'http://schemas.stark-industries.com/schemas/records/avengers.json',
        '_collections': ['Literature'],
        'document_type': ['article'],
        'titles': [
            {'title': 'Jessica Jones'},
            {'title': 'Luke Cage'},
            {'title': 'Frank Castle'},
        ],
        'authors': [
            {'full_name': 'Maldacena, J.'},
            {'full_name': 'Strominger, A.'},
        ],
        'abstracts': [
            {'source': 'arxiv', 'value': 'A basic abstract.'}
        ],
        'report_numbers': [{'value': 'DESY-17-036'}],
        'dois': [
            {
                'value': '10.1007/978-3-319-15001-7'
            }
        ],
    }

    eng_uuid = start('article', [record_update])

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    conflicts = obj.extra_data.get('conflicts')

    assert obj.status == ObjectStatus.HALTED
    assert len(conflicts) == 1
    assert obj.extra_data.get('callback_url') is not None
    assert obj.extra_data.get('is-update') is True
Esempio n. 56
0
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        record_update = RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS
        record_update.update({
            'arxiv_eprints':
            factory.record_metadata.json.get('arxiv_eprints')
        })

        eng_uuid = start('article', [record_update])

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True

        # source us unknown, so no new root is saved.
        roots = read_all_wf_record_sources(factory.record_metadata.id)
        assert not roots
Esempio n. 57
0
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    record, categories = core_record()
    record['arxiv_eprints'][0]['categories'] = ['q-bio.GN']

    obj = workflow_object_class.create(
        data=record,
        status=ObjectStatus.COMPLETED,
        data_type='hep',
    )
    obj.extra_data['approved'] = False  # reject it
    obj.save()
    es.indices.refresh('holdingpen-hep')

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES': categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_id = build_workflow(record).id
            eng_uuid = start('article', object_id=workflow_id)
            eng = WorkflowEngine.from_uuid(eng_uuid)
            obj2 = eng.processed_objects[0]
            assert not obj2.extra_data['auto-approved']
            assert len(obj2.extra_data['previously_rejected_matches']) > 0
            assert obj2.status == ObjectStatus.COMPLETED
Esempio n. 58
0
def test_update_exact_matched_goes_trough_the_workflow(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get("control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
Esempio n. 59
0
def test_regression_non_relevant_update_is_not_rejected_and_gets_merged(
    mocked_api_request_magpie,
    mocked_beard_api,
    mock_is_record_relevant,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    factory = TestRecordMetadata.create_from_file(
        __name__, 'merge_record_arxiv.json', index_name='records-hep'
    )
    update_workflow_id = build_workflow(factory.record_metadata.json).id
    eng_uuid = start('article', object_id=update_workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    mock_is_record_relevant.assert_not_called()

    assert obj.extra_data.get('is-update') is True
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is True
    assert obj.extra_data['merged'] is True
Esempio n. 60
0
def test_merge_with_conflicts_callback_url_and_resolve(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.HALTED
        assert expected_url == obj.extra_data.get('callback_url')
        assert len(conflicts) == 1

        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS

        # resolve conflicts
        obj.data['number_of_pages'] = factory.record_metadata.json.get('number_of_pages')
        del obj.extra_data['conflicts']

        payload = {
            'id': obj.id,
            'metadata': obj.data,
            '_extra_data': obj.extra_data
        }

        with workflow_app.test_client() as client:
            response = client.put(
                obj.extra_data.get('callback_url'),
                data=json.dumps(payload),
                content_type='application/json',
            )
        assert response.status_code == 200

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert conflicts is None

        assert obj.extra_data.get('approved') is True
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data.get('merged') is True

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root.json == RECORD_WITH_CONFLICTS