def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get('approved') is False

    es.indices.refresh('holdingpen-hep')

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data['already-in-holding-pen'] is False
    assert obj2.extra_data['previously_rejected'] is True
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
Exemple #2
0
def test_update_exact_matched_goes_trough_the_workflow(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get(
        "control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
Exemple #3
0
def start_edit_article_workflow(recid):
    try:
        record = get_db_record('lit', recid)
    except RecordGetterError:
        raise CallbackRecordNotFoundError(recid)

    record_permission = RecordPermission.create(action='update', record=record)
    if not record_permission.can():
        abort(403, record_permission)
    # has to be done before start() since, it is deattaching this session
    user_id = current_user.get_id()
    eng_uuid = start('edit_article', data=record)
    workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    workflow = workflow_object_class.get(workflow_id)
    workflow.id_user = user_id
    if request.referrer:
        base_rt_url = get_rt_link_for_ticket('').replace('?', '\?')
        ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer)
        if ticket_match:
            ticket_id = int(ticket_match.group('ticket_id'))
            workflow.extra_data['curation_ticket_id'] = ticket_id

    workflow.save()
    db.session.commit()
    url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id)
    return redirect(location=url, code=302)
Exemple #4
0
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)
        wf = workflow_object_class.get(workflow_id)
        wf.extra_data['approved'] = True
        wf.save()
        wf.continue_workflow(delayed=False)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" not in collections_in_record
    assert "HAL Hidden" not in collections_in_record
    assert "Fermilab" not in collections_in_record
    assert ["Literature"] == collections_in_record
Exemple #5
0
def webcoll_callback():
    """Handle a callback from webcoll with the record ids processed.

    Expects the request data to contain a list of record ids in the
    recids field.
    """
    recids = dict(request.form).get('recids', [])
    pending_records = current_cache.get("pending_records") or dict()
    for rid in recids:
        if rid in pending_records:
            objectid = pending_records[rid]
            workflow_object = workflow_object_class.get(objectid)
            base_url = _get_base_url()
            workflow_object.extra_data['url'] = join(
                base_url, 'record', str(rid)
            )
            workflow_object.extra_data['recid'] = rid
            workflow_object.save()
            db.session.commit()
            workflow_object.continue_workflow(delayed=True)
            del pending_records[rid]
            current_cache.set(
                "pending_records",
                pending_records,
                timeout=current_app.config["PENDING_RECORDS_CACHE_TIMEOUT"]
            )
    return jsonify({"result": "success"})
Exemple #6
0
def test_workflows_halts_on_multiple_exact_matches(workflow_app):
    # Record from arxiv with just arxiv ID in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_arxiv.json", index_name="records-hep"
    )

    # Record from publisher with just DOI in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_publisher.json", index_name="records-hep"
    )

    path = pkg_resources.resource_filename(
        __name__, "fixtures/multiple_matches_arxiv_update.json"
    )
    update_from_arxiv = json.load(open(path))

    # An update from arxiv with the same arxiv and DOI as above records
    workflow_id = build_workflow(update_from_arxiv).id
    start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert len(set(obj.extra_data["matches"]["exact"])) == 2

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_responses_with_etag(workflow_app):

    factory = TestRecordMetadata.create_from_kwargs(
        json={'titles': [{'title': 'Etag version'}]}
    )

    workflow_id = build_workflow(factory.record_metadata.json).id
    obj = workflow_object_class.get(workflow_id)
    obj.save()
    db.session.commit()

    workflow_url = '/api/holdingpen/{}'.format(obj.id)

    with workflow_app.test_client() as client:
        login_user_via_session(client, email='*****@*****.**')
        response = client.get(workflow_url)
        assert response.status_code == 200

        etag = response.headers['ETag']
        last_modified = response.headers['Last-Modified']

        response = client.get(
            workflow_url, headers={'If-Modified-Since': last_modified})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': etag})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'})
        assert response.status_code == 200
Exemple #8
0
def test_harvesting_arxiv_workflow_manual_rejected(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid, eng, obj = get_halted_workflow(
        app=workflow_app, extra_config=extra_config, record=record
    )

    obj.extra_data["approved"] = False
    obj.save()
    db.session.commit()

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj_id = obj.id
    obj.continue_workflow()

    obj = workflow_object_class.get(obj_id)
    # It was rejected
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is False
Exemple #9
0
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
        "preprint_date": "Jessica Jones",
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_message = "Validation error."
    expected_error_code = "VALIDATION_ERROR"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]

    assert data["workflow"]["_extra_data"]["callback_url"]
    assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
Exemple #10
0
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start('article', object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']

    expected_url = 'http://localhost:5000/callback/workflows/resolve_validation_errors'

    assert expected_url == obj.extra_data['callback_url']
    assert obj.extra_data['validation_errors']
    assert 'message' in obj.extra_data['validation_errors'][0]
    assert 'path' in obj.extra_data['validation_errors'][0]
def test_harvesting_arxiv_workflow_manual_rejected(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid, eng, obj = get_halted_workflow(
        app=workflow_app, extra_config=extra_config, record=record
    )

    obj.extra_data["approved"] = False
    obj.save()
    db.session.commit()

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj_id = obj.id
    obj.continue_workflow()

    obj = workflow_object_class.get(obj_id)
    # It was rejected
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is False
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it

    obj = workflow_object_class.create(
        data_type='hep',
        **simple_record
    )
    workflow_uuid = start('article', object_id=obj.id)
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
Exemple #13
0
def stop_matched_holdingpen_wfs(obj, eng):
    """Stop the matched workflow objects in the holdingpen.

    Stops the matched workflows in the holdingpen by replacing their steps with
    a new one defined on the fly, containing a ``stop`` step, and executing it.
    For traceability reason, these workflows are also marked as
    ``'stopped-by-wf'``, whose value is the current workflow's id.

    In the use case of harvesting twice an article, this function is involved
    to stop the first workflow and let the current one being processed,
    since it the latest metadata.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None
    """
    stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing]

    obj.save()

    for holdingpen_wf_id in obj.extra_data['holdingpen_matches']:
        holdingpen_wf = workflow_object_class.get(holdingpen_wf_id)
        holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow)

        # stop this holdingpen workflow by replacing its steps with a stop step
        holdingpen_wf_eng.callbacks.replace(stopping_steps)
        holdingpen_wf_eng.process([holdingpen_wf])
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_match,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()
    """Test a full harvesting workflow."""

    workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record)

    do_accept_core(app=workflow_app, workflow_id=obj.id)

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    assert obj.status == ObjectStatus.WAITING

    do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345])

    obj = workflow_object_class.get(obj.id)
    assert obj.status == ObjectStatus.WAITING

    do_webcoll_callback(app=workflow_app, recids=[12345])

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    # It was accepted
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is True
Exemple #15
0
def continue_workflow_callback():
    """Handle callback to continue a workflow.

    Expects the request data to contain a object ID in the
    nonce field.
    """
    request_data = request.get_json()
    id_object = request_data.get("nonce", "")

    if id_object:
        callback_results = request_data.get("results", {})
        workflow_object = workflow_object_class.get(id_object)
        if workflow_object:
            results = request_data.get("results", [])
            for result in results:
                status = result.get('success', False)
                if status:
                    recid = result.get('recid')
                    base_url = _get_base_url()
                    workflow_object.extra_data['url'] = join(
                        base_url,
                        'record',
                        str(recid)
                    )
                    workflow_object.extra_data['recid'] = recid
            # Will add the results to the engine extra_data column.
            workflow_object.save()
            db.session.commit()
            workflow_object.continue_workflow(
                delayed=True,
                callback_results=callback_results
            )
            return jsonify({"result": "success"})
    return jsonify({"result": "failed"})
def test_responses_with_etag(workflow_app):

    factory = TestRecordMetadata.create_from_kwargs(
        json={'titles': [{'title': 'Etag version'}]}
    )

    workflow_id = build_workflow(factory.record_metadata.json).id
    obj = workflow_object_class.get(workflow_id)
    obj.save()
    db.session.commit()

    workflow_url = '/api/holdingpen/{}'.format(obj.id)

    with workflow_app.test_client() as client:
        login_user_via_session(client, email='*****@*****.**')
        response = client.get(workflow_url)
        assert response.status_code == 200

        etag = response.headers['ETag']
        last_modified = response.headers['Last-Modified']

        response = client.get(
            workflow_url, headers={'If-Modified-Since': last_modified})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': etag})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'})
        assert response.status_code == 200
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
        "preprint_date": "Jessica Jones",
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_message = "Validation error."
    expected_error_code = "VALIDATION_ERROR"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]

    assert data["workflow"]["_extra_data"]["callback_url"]
    assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def resolve_missmatch_version_with_legacy(workflow_id, legacy_revision):
    """Revert record revision to be the same with the legacy version.

    Example ::
        resolve_missmatch_version_with_legacy(1236029, '20180926071008.0')
    """
    obj = workflow_object_class.get(workflow_id)
    record = get_db_record('lit', obj.data['control_number'])
    revisions = [
        revision for revision in record.revisions
        if revision.get('legacy_version') == legacy_revision
    ]

    if not revisions:
        print('revision {} not found'.format(legacy_revision))
        return None

    print('revision found.')
    revision = revisions.pop()
    record.clear()
    record.update(revision, skip_files=True)
    record.commit()
    obj.callback_pos = [0]
    obj.save()
    db.session.commit()
    response = obj.continue_workflow(delayed=True)
    print 'Workflow {} currently in status {}'.format(workflow_id,
                                                      response.status)
Exemple #19
0
def stop_matched_holdingpen_wfs(obj, eng):
    """Stop the matched workflow objects in the holdingpen.

    Stops the matched workflows in the holdingpen by replacing their steps with
    a new one defined on the fly, containing a ``stop`` step, and executing it.
    For traceability reason, these workflows are also marked as
    ``'stopped-by-wf'``, whose value is the current workflow's id.

    In the use case of harvesting twice an article, this function is involved
    to stop the first workflow and let the current one being processed,
    since it the latest metadata.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None
    """
    stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing]

    obj.save()

    for holdingpen_wf_id in obj.extra_data['holdingpen_matches']:
        holdingpen_wf = workflow_object_class.get(holdingpen_wf_id)
        holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow)

        # stop this holdingpen workflow by replacing its steps with a stop step
        holdingpen_wf_eng.callbacks.replace(stopping_steps)
        holdingpen_wf_eng.process([holdingpen_wf])
def test_conflict_creates_ticket(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        TestRecordMetadata.create_from_file(__name__,
                                            'merge_record_arxiv.json',
                                            index_name='records-hep')
        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        start('article', object_id=update_workflow_id)

        wf = workflow_object_class.get(update_workflow_id)
        expected_ticket = u'content=Queue%3A+HEP_conflicts%0AText%3A+Merge+conflict+needs+to+be+resolved.%0A++%0A++https%3A%2F%2Flocalhost%3A5000%2Feditor%2Fholdingpen%2F{wf_id}%0ASubject%3A+arXiv%3A1703.04802+%28%23None%29%0Aid%3A+ticket%2Fnew%0ACF'.format(
            wf_id=wf.id)

        assert mocked_external_services.request_history[0].text.startswith(
            expected_ticket)
        assert wf.extra_data['conflict-ticket-id']

        expected_ticket_close_url = 'http://rt.inspire/ticket/{ticket_id}/edit'.format(
            ticket_id=wf.extra_data['conflict-ticket-id'])

        wf.continue_workflow()

        assert mocked_external_services.request_history[
            1].url == expected_ticket_close_url
        assert mocked_external_services.request_history[
            1].text == u'content=Status%3A+resolved'
Exemple #21
0
def start_edit_article_workflow(recid):
    try:
        record = get_db_record('lit', recid)
    except RecordGetterError:
        raise CallbackRecordNotFoundError(recid)

    record_permission = RecordPermission.create(action='update', record=record)
    if not record_permission.can():
        abort(403, record_permission)

    eng_uuid = start('edit_article', data=record)
    workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    workflow = workflow_object_class.get(workflow_id)

    if request.referrer:
        base_rt_url = get_rt_link_for_ticket('').replace('?', '\?')
        ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)',
                                request.referrer)
        if ticket_match:
            ticket_id = int(ticket_match.group('ticket_id'))
            workflow.extra_data['curation_ticket_id'] = ticket_id
            workflow.save()
            db.session.commit()

    url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'],
                        workflow_id)
    return redirect(location=url, code=302)
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'A title'},
        ],
    }

    obj = workflow_object_class.create(
        data=invalid_record,
        data_type='hep',
        id_user=1,
    )
    obj_id = obj.id

    with pytest.raises(ValidationError):
        start('article', invalid_record, obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']
Exemple #23
0
def _continue_workflow(workflow_id, recid, result=None):
    """Small wrapper to continue a workflow.

    Will prepare the needed data from the record id and the result data if
    passed.

    :return: True if succeeded, False if the specified workflow id does not
        exist.
    """
    result = result if result is not None else {}
    base_url = _get_base_url()
    try:
        workflow_object = workflow_object_class.get(workflow_id)
    except WorkflowsMissingObject:
        current_app.logger.error(
            'No workflow object with the id %s could be found.',
            workflow_id,
        )
        return False

    workflow_object.extra_data['url'] = join(
        base_url,
        'record',
        str(recid)
    )
    workflow_object.extra_data['recid'] = recid
    workflow_object.data['control_number'] = recid
    workflow_object.extra_data['callback_result'] = result
    workflow_object.save()
    db.session.commit()
    workflow_object.continue_workflow(delayed=True)

    return True
Exemple #24
0
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it

    obj = workflow_object_class.create(
        data_type='hep',
        **simple_record
    )
    workflow_uuid = start('article', object_id=obj.id)
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id
    current_search.flush_and_refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
Exemple #25
0
def update_existing_workflow_object(obj, eng):
    """Update the data of the old object with the new data."""
    from invenio_workflows import workflow_object_class

    holdingpen_ids = obj.extra_data.get("holdingpen_ids", [])
    for matched_id in holdingpen_ids:
        existing_obj = workflow_object_class.get(matched_id)
        if (
                obj.data.get('acquisition_source') and
                existing_obj.data.get('acquisition_source')
        ):
            if (
                    obj.data['acquisition_source'].get('method') ==
                    existing_obj.data['acquisition_source'].get('method')
            ):
                # Method is the same, update obj
                existing_obj.data.update(obj.data)
                existing_obj.save()
                break
    else:
        msg = "Cannot update old object, non valid ids: {0}".format(
            holdingpen_ids
        )
        obj.log.error(msg)
        raise Exception(msg)
Exemple #26
0
def _continue_workflow(workflow_id, recid, result=None):
    """Small wrapper to continue a workflow.

    Will prepare the needed data from the record id and the result data if
    passed.

    :return: True if succeeded, False if the specified workflow id does not
        exist.
    """
    result = result if result is not None else {}
    base_url = _get_base_url()
    try:
        workflow_object = workflow_object_class.get(workflow_id)
    except WorkflowsMissingObject:
        current_app.logger.error(
            'No workflow object with the id %s could be found.',
            workflow_id,
        )
        return False

    workflow_object.extra_data['url'] = join(base_url, 'record', str(recid))
    workflow_object.extra_data['recid'] = recid
    workflow_object.extra_data['callback_result'] = result
    workflow_object.save()
    db.session.commit()
    workflow_object.continue_workflow(delayed=True)

    return True
Exemple #27
0
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_match,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()
    """Test a full harvesting workflow."""

    workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record)

    do_accept_core(app=workflow_app, workflow_id=obj.id)

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    assert obj.status == ObjectStatus.WAITING

    do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345])

    obj = workflow_object_class.get(obj.id)
    assert obj.status == ObjectStatus.WAITING

    do_webcoll_callback(app=workflow_app, recids=[12345])

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    # It was accepted
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is True
Exemple #28
0
def webcoll_callback():
    """Handle a callback from webcoll with the record ids processed.

    Expects the request data to contain a list of record ids in the
    recids field.
    """
    recids = dict(request.form).get('recids', [])
    pending_records = current_cache.get("pending_records") or dict()
    for rid in recids:
        if rid in pending_records:
            objectid = pending_records[rid]
            workflow_object = workflow_object_class.get(objectid)
            base_url = _get_base_url()
            workflow_object.extra_data['url'] = join(base_url, 'record',
                                                     str(rid))
            workflow_object.extra_data['recid'] = rid
            workflow_object.save()
            db.session.commit()
            workflow_object.continue_workflow(delayed=True)
            del pending_records[rid]
            current_cache.set(
                "pending_records",
                pending_records,
                timeout=current_app.config["PENDING_RECORDS_CACHE_TIMEOUT"])
    return jsonify({"result": "success"})
Exemple #29
0
def update_existing_workflow_object(obj, eng):
    """Update the data of the old object with the new data."""
    from invenio_workflows import workflow_object_class

    holdingpen_ids = obj.extra_data.get("holdingpen_ids", [])
    for matched_id in holdingpen_ids:
        existing_obj = workflow_object_class.get(matched_id)
        if (
                obj.data.get('acquisition_source') and
                existing_obj.data.get('acquisition_source')
        ):
            if (
                    obj.data['acquisition_source'].get('method') ==
                    existing_obj.data['acquisition_source'].get('method')
            ):
                # Method is the same, update obj
                existing_obj.data.update(obj.data)
                existing_obj.save()
                break
    else:
        msg = "Cannot update old object, non valid ids: {0}".format(
            holdingpen_ids
        )
        obj.log.error(msg)
        raise Exception(msg)
Exemple #30
0
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    current_search.flush_and_refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_workflows_halts_on_multiple_exact_matches(workflow_app):
    # Record from arxiv with just arxiv ID in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_arxiv.json", index_name="records-hep"
    )

    # Record from publisher with just DOI in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_publisher.json", index_name="records-hep"
    )

    path = pkg_resources.resource_filename(
        __name__, "fixtures/multiple_matches_arxiv_update.json"
    )
    update_from_arxiv = json.load(open(path))

    # An update from arxiv with the same arxiv and DOI as above records
    workflow_id = build_workflow(update_from_arxiv).id
    start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert len(set(obj.extra_data["matches"]["exact"])) == 2

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
Exemple #32
0
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
Exemple #33
0
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }]
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert "_error_msg" in obj.extra_data
    assert "required" in obj.extra_data["_error_msg"]

    expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors"

    assert expected_url == obj.extra_data["callback_url"]
    assert obj.extra_data["validation_errors"]
    assert "message" in obj.extra_data["validation_errors"][0]
    assert "path" in obj.extra_data["validation_errors"][0]
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_harvesting_arxiv_workflow_accepted(
    mocked, db_only_app, record_oai_arxiv_plots):
    """Test a full harvesting workflow."""
    from invenio_workflows import (
        start, WorkflowEngine, ObjectStatus, workflow_object_class
    )
    from dojson.contrib.marc21.utils import create_record
    from invenio_db import db
    from inspirehep.dojson.hep import hep
    from inspirehep.modules.converter.xslt import convert

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(
        record_oai_arxiv_plots,
        "oaiarXiv2marcxml.xsl"
    )
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with db_only_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == "2014"
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with db_only_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected(
    app,
    celery_app_with_context,
    celery_session_worker,
    generated_record
):
    app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False
    app.config['PRODUCTION_MODE'] = False
    app.config['USE_SIGNALS_ON_TIMEOUT'] = False
    record = generated_record

    workflow = build_workflow(record)

    workflow.save()

    db.session.commit()

    wf1_id = workflow.id

    start.delay('article', object_id=wf1_id)
    es.indices.refresh('holdingpen-hep')

    check_wf_state(wf1_id, ObjectStatus.HALTED)
    wf1 = workflow_object_class.get(wf1_id)
    wf1.extra_data["approved"] = False
    wf1.continue_workflow(delayed=True)

    es.indices.refresh('holdingpen-hep')

    check_wf_state(wf1_id, ObjectStatus.COMPLETED)
    wf1 = workflow_object_class.get(wf1_id)
    assert wf1.extra_data.get("approved") is False

    workflow2 = build_workflow(record)
    workflow2.save()
    db.session.commit()
    wf2_id = workflow2.id
    start.delay('article', object_id=wf2_id)

    es.indices.refresh("holdingpen-hep")

    check_wf_state(wf2_id, ObjectStatus.COMPLETED)
    wf2 = workflow_object_class.get(wf2_id)

    assert wf2.extra_data["previously_rejected"] is True
    assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_search,
    mocked_api_request_beard_block,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download,
    workflow_app,
    record,
):
    """Test a full harvesting workflow."""
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            requests_mock.ANY,
            re.compile('.*(indexer|localhost).*'),
            real_http=True,
        )
        requests_mocker.register_uri(
            'POST',
            re.compile(
                'https?://localhost:1234.*',
            ),
            text=u'[INFO]',
            status_code=200,
        )

        workflow_uuid, eng, obj = get_halted_workflow(
            app=workflow_app,
            extra_config={'PRODUCTION_MODE': False},
            record=record,
        )

        _do_accept_core(
            app=workflow_app,
            workflow_id=obj.id,
        )

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        assert obj.status == ObjectStatus.WAITING

        response = _do_robotupload_callback(
            app=workflow_app,
            workflow_id=obj.id,
            recids=[12345],
        )
        assert response.status_code == 200

        obj = workflow_object_class.get(obj.id)
        assert obj.status == ObjectStatus.WAITING

        response = _do_webcoll_callback(app=workflow_app, recids=[12345])
        assert response.status_code == 200

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
def test_update_record_goes_through_api_version_of_store_record_without_issue(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    expected_control_number = record['control_number']
    expected_head_uuid = str(record.id)
    with mock.patch.dict(
            workflow_app.config, {
                "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True,
                "INSPIREHEP_URL": "http://web:8000"
            }):
        with requests_mock.Mocker(real_http=True) as requests_mocker:
            requests_mocker.register_uri(
                'PUT',
                '{url}/literature/{cn}'.format(
                    url=workflow_app.config.get("INSPIREHEP_URL"),
                    cn=expected_control_number,
                ),
                headers={'content-type': 'application/json'},
                status_code=200,
                json={
                    'metadata': {
                        'control_number': expected_control_number,
                    },
                    'id_': expected_head_uuid
                })
            eng_uuid = start("article", object_id=workflow_id)
            url_paths = [r.path for r in requests_mocker.request_history]
            url_hostnames = [
                r.hostname for r in requests_mocker.request_history
            ]

            assert 'web' in url_hostnames
            assert "/literature/{cn}".format(
                cn=expected_control_number) in url_paths

    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.data['control_number'] == expected_control_number

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get(
        "control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    es.indices.refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    es.indices.refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
Exemple #41
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    current_search.flush_and_refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    current_search.flush_and_refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_accepted(mocked, small_app,
                                            record_oai_arxiv_plots):
    """Test a full harvesting workflow."""
    from invenio_workflows import (start, WorkflowEngine, ObjectStatus,
                                   workflow_object_class)
    from dojson.contrib.marc21.utils import create_record
    from invenio_db import db
    from inspirehep.dojson.hep import hep
    from inspirehep.modules.converter.xslt import convert

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots,
                                             "oaiarXiv2marcxml.xsl")
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with small_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == "2014"
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with small_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
def test_manual_merge_existing_records(workflow_app):

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(json_update, skip_files=False)
    update.commit()
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None

    update_source = LiteratureReader(update).source
    root_update = read_wf_record_source(update_id, update_source)
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
Exemple #44
0
def test_manual_merge_existing_records(workflow_app):

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(json_update, skip_files=False)
    update.commit()
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None

    update_source = LiteratureReader(update).source
    root_update = read_wf_record_source(update_id, update_source)
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_search,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    workflow_app,
):
    record = generate_record()
    """Test a full harvesting workflow."""
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            requests_mock.ANY,
            re.compile('.*(indexer|localhost).*'),
            real_http=True,
        )
        requests_mocker.register_uri(
            'POST',
            re.compile('https?://localhost:1234.*', ),
            text=u'[INFO]',
            status_code=200,
        )

        workflow_uuid, eng, obj = get_halted_workflow(
            app=workflow_app,
            extra_config={'PRODUCTION_MODE': False},
            record=record,
        )

        do_accept_core(
            app=workflow_app,
            workflow_id=obj.id,
        )

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        assert obj.status == ObjectStatus.WAITING

        response = do_robotupload_callback(
            app=workflow_app,
            workflow_id=obj.id,
            recids=[12345],
        )
        assert response.status_code == 200

        obj = workflow_object_class.get(obj.id)
        assert obj.status == ObjectStatus.WAITING

        response = do_webcoll_callback(app=workflow_app, recids=[12345])
        assert response.status_code == 200

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Exemple #46
0
def test_update_author_submit_with_required_fields(mock_start, workflow_app, mocked_external_services):
    data = {
        "data": {
            "$schema": "http://*****:*****@gmail.com",
                "datetime": "2019-02-04T10:06:34.695915",
                "method": "submitter",
                "submission_number": "None",
                "internal_uid": 1,
            },
            "name": {
                "value": "Martinez, Diegpo"
            },
            "status": "active",
            "control_number": 3
        }
    }
    with workflow_app.test_client() as client:
        headers = {"Authorization": "Bearer " + current_app.config["AUTHENTICATION_TOKEN"]}
        response = client.post('/workflows/authors', data=json.dumps(data), content_type='application/json',
                               headers=headers)
        assert response.status_code == 200

        workflow_object_id = json.loads(response.data).get('workflow_object_id')
        assert workflow_object_id is not None

        obj = workflow_object_class.get(workflow_object_id)

        mock_start.delay.assert_called_once_with("author", object_id=workflow_object_id)

        expected = {
            "status": "active",
            "$schema": "http://*****:*****@gmail.com",
                "submission_number": "1",
                "datetime": "2019-02-04T10:06:34.695915"
            },
            "_collections": [
                "Authors"
            ],
            "name": {
                "value": "Martinez, Diegpo"
            },
            "control_number": 3
        }

        assert expected == obj.data

        assert obj.extra_data['is-update'] is True
def test_update_author_submit_with_required_fields(mock_start, workflow_app, mocked_external_services):
    data = {
        "data": {
            "$schema": "http://*****:*****@gmail.com",
                "datetime": "2019-02-04T10:06:34.695915",
                "method": "submitter",
                "submission_number": "None",
                "internal_uid": 1,
            },
            "name": {
                "value": "Martinez, Diegpo"
            },
            "status": "active",
            "control_number": 3
        }
    }
    with workflow_app.test_client() as client:
        headers = {"Authorization": "Bearer " + current_app.config["AUTHENTICATION_TOKEN"]}
        response = client.post('/workflows/authors', data=json.dumps(data), content_type='application/json',
                               headers=headers)
        assert response.status_code == 200

        workflow_object_id = json.loads(response.data).get('workflow_object_id')
        assert workflow_object_id is not None

        obj = workflow_object_class.get(workflow_object_id)

        mock_start.delay.assert_called_once_with("author", object_id=workflow_object_id)

        expected = {
            "status": "active",
            "$schema": "http://*****:*****@gmail.com",
                "submission_number": "1",
                "datetime": "2019-02-04T10:06:34.695915"
            },
            "_collections": [
                "Authors"
            ],
            "name": {
                "value": "Martinez, Diegpo"
            },
            "control_number": 3
        }

        assert expected == obj.data

        assert obj.extra_data['is-update'] is True
Exemple #48
0
def test_cli_delete_edit_article_workflows(app_cli_runner):
    wf_to_be_deleted = build_workflow({}, data_type='hep')
    wf_to_be_deleted.save()
    start('edit_article', object_id=wf_to_be_deleted.id)
    wf_to_be_deleted = workflow_object_class.get(wf_to_be_deleted.id)
    wf_to_be_deleted.status = ObjectStatus.WAITING
    wf_to_be_deleted.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777)
    wf_to_be_deleted.save()

    wf_in_error = build_workflow({}, data_type='hep')
    wf_in_error.status = ObjectStatus.ERROR
    wf_in_error.extra_data["_error_msg"] = "Error in WebColl"
    wf_in_error.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777)
    wf_in_error.save()

    recent_wf = build_workflow({}, data_type='hep')
    recent_wf.save()
    start('edit_article', object_id=recent_wf.id)
    recent_wf = workflow_object_class.get(recent_wf.id)
    recent_wf.status = ObjectStatus.WAITING
    recent_wf.created = datetime.datetime(2020, 7, 11, 12, 31, 8, 299777)
    recent_wf.save()

    indices = ['holdingpen-hep']
    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total']['value'] == 3

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 3

    result = app_cli_runner.invoke(workflows,
                                   ['delete_edit_article_older_than'])

    assert "Found 1 workflows to delete older than 48 hours" in result.output_bytes
    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total']['value'] == 2

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 2
    assert WorkflowObjectModel.query.filter_by(
        id=wf_to_be_deleted.id).one_or_none() is None
def test_authors_workflow_continues_when_record_is_valid(workflow_app, mocked_external_services):
    valid_record = {
        '_collections': ['Authors'],
        'name': {
            'preferred_name': 'John Smith',
            'value': 'Smith, John'
        }
    }

    workflow_id = build_workflow(valid_record, data_type='authors', id_user=1).id

    obj = workflow_object_class.get(workflow_id)

    start('author', object_id=obj.id)

    obj = workflow_object_class.get(obj.id)

    assert obj.status == ObjectStatus.HALTED
    assert '_error_msg' not in obj.extra_data
def test_update_record_goes_through_api_version_of_store_record_without_issue(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    expected_control_number = record['control_number']
    expected_head_uuid = str(record.id)
    with mock.patch.dict(
        workflow_app.config, {
            "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True,
            "INSPIREHEP_URL": "http://web:8000"
        }
    ):
        with requests_mock.Mocker(real_http=True) as requests_mocker:
            requests_mocker.register_uri(
                'PUT', '{url}/literature/{cn}'.format(
                    url=workflow_app.config.get("INSPIREHEP_URL"),
                    cn=expected_control_number,
                ),
                headers={'content-type': 'application/json'},
                status_code=200,
                json={
                    'metadata': {
                        'control_number': expected_control_number,
                    },
                    'id_': expected_head_uuid
                }
            )
            eng_uuid = start("article", object_id=workflow_id)
            url_paths = [r.path for r in requests_mocker.request_history]
            url_hostnames = [r.hostname for r in requests_mocker.request_history]

            assert 'web' in url_hostnames
            assert "/literature/{cn}".format(cn=expected_control_number) in url_paths

    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.data['control_number'] == expected_control_number

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get("control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
Exemple #51
0
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected(
        app, celery_app_with_context, celery_session_worker, generated_record):
    app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False
    app.config['PRODUCTION_MODE'] = False
    app.config['USE_SIGNALS_ON_TIMEOUT'] = False
    record = generated_record

    workflow = build_workflow(record)

    workflow.save()

    db.session.commit()

    wf1_id = workflow.id

    start.delay('article', object_id=wf1_id)
    es.indices.refresh('holdingpen-hep')

    check_wf_state(wf1_id, ObjectStatus.HALTED)
    wf1 = workflow_object_class.get(wf1_id)
    wf1.extra_data["approved"] = False
    wf1.continue_workflow(delayed=True)

    es.indices.refresh('holdingpen-hep')

    check_wf_state(wf1_id, ObjectStatus.COMPLETED)
    wf1 = workflow_object_class.get(wf1_id)
    assert wf1.extra_data.get("approved") is False

    workflow2 = build_workflow(record)
    workflow2.save()
    db.session.commit()
    wf2_id = workflow2.id
    start.delay('article', object_id=wf2_id)

    es.indices.refresh("holdingpen-hep")

    check_wf_state(wf2_id, ObjectStatus.COMPLETED)
    wf2 = workflow_object_class.get(wf2_id)

    assert wf2.extra_data["previously_rejected"] is True
    assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def restart_workflow(workflow_id, position=[0]):
    wf = workflow_object_class.get(workflow_id)
    print 'Workflow {} is currently in position {}'.format(
        workflow_id, wf.callback_pos)
    wf.callback_pos = position
    wf.save()

    db.session.commit()

    res = wf.continue_workflow(delayed=True)
    print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
Exemple #53
0
def remove_references(workflow_id):
    wf = workflow_object_class.get(workflow_id)
    print 'Workflow {} is currently in position {}'.format(
        workflow_id, wf.callback_pos)
    # Note that an empty list is not schema compliant.
    del wf.data['references']
    wf.save()
    db.session.commit()

    res = wf.continue_workflow(start_point='restart_task', delayed=True)
    print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
def remove_references(workflow_id):
    wf = workflow_object_class.get(workflow_id)
    print 'Workflow {} is currently in position {}'.format(workflow_id,
                                                           wf.callback_pos)
    # Note that an empty list is not schema compliant.
    del wf.data['references']
    wf.save()
    db.session.commit()

    res = wf.continue_workflow(start_point='restart_task', delayed=True)
    print 'Workflow {} currently in status {}'.format(workflow_id,
                                                      res.status)
Exemple #55
0
def restart_workflow(workflow_id, position=[0]):
    wf = workflow_object_class.get(workflow_id)
    print 'Workflow {} is currently in position {}'.format(workflow_id,
                                                           wf.callback_pos)
    wf.callback_pos = position
    wf.save()

    db.session.commit()

    res = wf.continue_workflow(delayed=True)
    print 'Workflow {} currently in status {}'.format(workflow_id,
                                                      res.status)
def test_edit_article_view_sets_user_id(workflow_api_client):
    user = User.query.filter_by(email='*****@*****.**').one()
    login_user_via_session(workflow_api_client, user=user)

    factory = TestRecordMetadata.create_from_kwargs(json={})
    control_number = factory.record_metadata.json['control_number']
    endpoint_url = "/workflows/edit_article/{}".format(control_number)

    response = workflow_api_client.get(endpoint_url)
    wflw_id = response.headers['Location'].split('/')[-1]
    wflw = workflow_object_class.get(wflw_id)

    assert wflw.id_user == int(user.get_id())
Exemple #57
0
def test_update_author_submit_with_required_fields(api_client):
    data = {
        "data": {
            "_collections": [
                "Authors"
            ],
            "acquisition_source": {
                "email": "*****@*****.**",
                "datetime": "2019-02-04T10:06:34.695915",
                "method": "submitter",
                "submission_number": "None",
                "internal_uid": 1,
            },
            "name": {
                "value": "Martinez, Diegpo"
            },
            "status": "active",
            "control_number": 3
        }
    }
    response = api_client.post('/workflows/authors', data=json.dumps(data), content_type='application/json')
    assert response.status_code == 200

    workflow_object_id = json.loads(response.data).get('workflow_object_id')
    assert workflow_object_id is not None

    obj = workflow_object_class.get(workflow_object_id)

    expected = {
        "status": "active",
        "$schema": "http://*****:*****@gmail.com",
            "submission_number": "1",
            "datetime": "2019-02-04T10:06:34.695915"
        },
        "_collections": [
            "Authors"
        ],
        "name": {
            "value": "Martinez, Diegpo"
        },
        "control_number": 3
    }

    assert expected == obj.data

    assert obj.extra_data['is-update'] is True
Exemple #58
0
    def _get_wfs_same_source(obj, eng):
        current_source = get_value(obj.data, 'acquisition_source.source').lower()

        try:
            workflows = obj.extra_data[extra_data_key]
        except KeyError:
            workflows = []

        for wf_id in workflows:
            wf = workflow_object_class.get(wf_id)
            wf_source = get_value(wf.data, 'acquisition_source.source').lower()
            if wf_source == current_source:
                return True
        return False
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    es.indices.refresh("holdingpen-hep")

    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]