Ejemplo n.º 1
0
def test_revert_revision_works_correctly_and_runs_update(inspire_app):
    cited_data = {
        "$schema": "http://localhost:5000/schemas/records/hep.json",
        "titles": [{
            "title": "Test a valid record"
        }],
        "document_type": ["article"],
        "_collections": ["Literature"],
    }
    cited_record = LiteratureRecord.create(cited_data)

    citing_data = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "titles": [{
            "title": "My title"
        }],
        "document_type": ["article"],
        "_collections": ["Literature"],
        "preprint_date":
        "2019-06-28",
        "references": [{
            "record": {
                "$ref":
                f"http://localhost:5000/api/literature/{cited_record['control_number']}"
            }
        }],
    }
    citing_record = LiteratureRecord.create(citing_data)
    db.session.commit()

    assert len(citing_record.model.references) == 1
    assert len(cited_record.model.citations) == 1
    assert citing_record.revision_id == 2

    citing_record = LiteratureRecord.get_record(citing_record.id)
    data = dict(citing_record)
    del data["references"]
    citing_record.update(data)
    db.session.commit()

    citing_record = LiteratureRecord.get_record(citing_record.id)
    assert len(citing_record.model.references) == 0
    assert len(cited_record.model.citations) == 0
    assert citing_record.revision_id == 4

    citing_record.revert(2)
    db.session.commit()

    citing_record = LiteratureRecord.get_record(citing_record.id)
    assert len(citing_record.model.references) == 1
    assert len(cited_record.model.citations) == 1

    # Reverted to revision 2 but added as next revision
    # so it will be revision 6
    assert citing_record.revision_id == 6
    assert dict(citing_record.revisions[2]) == dict(citing_record)
Ejemplo n.º 2
0
def test_match_references(inspire_app, cli, celery_app_with_context,
                          celery_session_worker):
    cited_data = {
        "document_type": ["article"],
        "dois": [{
            "value": "10.1371/journal.pone.0188398"
        }],
    }
    cited_record = create_record_async("lit", data=cited_data)
    cited_record.index(
        delay=False)  # reference-matcher requires cited record to be indexed

    citer_data = {
        "references": [{
            "reference": {
                "dois": ["10.1371/journal.pone.0188398"]
            }
        }]
    }
    citer_record_1 = create_record_async("lit", data=citer_data)
    citer_record_2 = create_record_async("lit", data=citer_data)
    citer_record_3 = create_record_async("lit", data=citer_data)
    citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id]

    create_record_async("dat")

    result = cli.invoke(["match", "references", "-bs", 2])

    assert result.exit_code == 0

    for citer_id in citer_ids:
        updated_citer_record = LiteratureRecord.get_record(citer_id)
        assert (get_value(updated_citer_record,
                          "references[0].record") == cited_record["self"])
Ejemplo n.º 3
0
def test_match_references_by_uuids_dedupes_references_after_matching(
        inspire_app):
    citer_data = {
        "references": [
            {
                "reference": {
                    "report_numbers": ["AMBIGUOUS-42"]
                },
                "record": {
                    "$ref": "https://inspirehep.net/api/literature/1234"
                },
            },
            {
                "reference": {
                    "report_numbers": ["AMBIGUOUS-42"]
                },
                "record": {
                    "$ref": "https://inspirehep.net/api/literature/5678"
                },
            },
        ]
    }
    citer_record = create_record("lit", data=citer_data)
    match_references_by_uuids([str(citer_record.id)])

    updated_citer_record = LiteratureRecord.get_record(citer_record.id)
    expected_references = [{"reference": {"report_numbers": ["AMBIGUOUS-42"]}}]

    assert updated_citer_record["references"] == expected_references
Ejemplo n.º 4
0
def test_match_references_by_uuids(inspire_app):
    cited_data = {
        "document_type": ["article"],
        "dois": [{
            "value": "10.1371/journal.pone.0188398"
        }],
    }
    cited_record = create_record("lit", data=cited_data)

    citer_data = {
        "references": [{
            "reference": {
                "dois": ["10.1371/journal.pone.0188398"]
            }
        }]
    }
    citer_record = create_record("lit", data=citer_data)
    excluded_citer_record = create_record("lit",
                                          data=citer_data)  # won't be passed
    deleted_record = create_record("lit",
                                   data={
                                       "deleted": True,
                                       **citer_data
                                   },
                                   with_control_number=True)
    record_without_references = create_record("lit")

    match_references_by_uuids([
        str(citer_record.id),
        str(record_without_references.id),
        str(deleted_record.id),
    ])

    updated_citer_record = LiteratureRecord.get_record(citer_record.id)
    excluded_citer_record = LiteratureRecord.get_record(
        excluded_citer_record.id)
    with pytest.raises(NoResultFound):
        LiteratureRecord.get_record(deleted_record.id)
    deleted_record = LiteratureRecord.get_record(deleted_record.id,
                                                 with_deleted=True)

    assert (get_value(updated_citer_record,
                      "references[0].record") == cited_record["self"])
    assert "record" not in get_value(excluded_citer_record, "references[0]")
    assert "record" not in get_value(deleted_record, "references[0]")
Ejemplo n.º 5
0
def test_match_references_by_uuids(inspire_app):
    cited_data = {
        "document_type": ["article"],
        "dois": [{"value": "10.1371/journal.pone.0188398"}],
    }
    cited_record = create_record("lit", data=cited_data)

    citer_data = {
        "references": [{"reference": {"dois": ["10.1371/journal.pone.0188398"]}}]
    }
    citer_record = create_record("lit", data=citer_data)
    excluded_citer_record = create_record("lit", data=citer_data)  # won't be passed

    record_without_references = create_record("lit")

    match_references_by_uuids([str(citer_record.id), str(record_without_references.id)])

    updated_citer_record = LiteratureRecord.get_record(citer_record.id)
    excluded_citer_record = LiteratureRecord.get_record(excluded_citer_record.id)

    assert (
        get_value(updated_citer_record, "references[0].record") == cited_record["self"]
    )
    assert "record" not in get_value(excluded_citer_record, "references[0]")
Ejemplo n.º 6
0
def test_match_references(inspire_app, cli, clean_celery_session):
    cited_data = {
        "document_type": ["article"],
        "dois": [{
            "value": "10.1371/journal.pone.0188398"
        }],
    }
    cited_record = create_record_async("lit", data=cited_data)
    cited_record.index(
        delay=False)  # reference-matcher requires cited record to be indexed

    citer_data = {
        "references": [{
            "reference": {
                "dois": ["10.1371/journal.pone.0188398"]
            }
        }]
    }
    citer_record_1 = create_record_async("lit", data=citer_data)
    citer_record_2 = create_record_async("lit", data=citer_data)
    citer_record_3 = create_record_async("lit", data=citer_data)
    citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id]

    record_data = create_record_async("dat")
    record_data_uuids = record_data.id

    def assert_all_records_are_indexed():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        uuids = get_value(result, "hits.hits._id")

        for uuid in citer_ids:
            assert str(uuid) in uuids

        result = es_search("records-data")
        uuids = get_value(result, "hits.hits._id")
        assert str(record_data_uuids) in uuids

    retry_until_pass(assert_all_records_are_indexed)

    result = cli.invoke(["match", "references", "-bs", 2])

    assert result.exit_code == 0

    for citer_id in citer_ids:
        updated_citer_record = LiteratureRecord.get_record(citer_id)
        assert (get_value(updated_citer_record,
                          "references[0].record") == cited_record["self"])
Ejemplo n.º 7
0
def test_redirect_and_delete_record_from_deleted_records_field(inspire_app):
    record_to_delete = create_record("lit")
    record = create_record("lit")
    data = dict(record)
    data["deleted_records"] = [record_to_delete["self"]]
    record.update(data)

    deleted_records = record["deleted_records"]

    assert len(deleted_records) == 1

    old_pid = record_to_delete.control_number_pid
    assert old_pid.is_redirected()
    record_redirected = LiteratureRecord.get_record_by_pid_value(
        record_to_delete.control_number)
    assert record_redirected.id == record.id

    original_record = LiteratureRecord.get_record(record_to_delete.id,
                                                  with_deleted=True)
    assert original_record["deleted"] is True
Ejemplo n.º 8
0
def test_fulltext_indexer_updates_documents_when_record_changed(
        inspire_app, clean_celery_session, override_config):
    with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True):
        data = faker.record("lit")
        data.update({
            "arxiv_eprints": [{
                "categories": ["hep-ph"],
                "value": "hep-ph/9404247"
            }],
            "documents": [{
                "source":
                "arxiv",
                "fulltext":
                True,
                "filename":
                "arXiv:nucl-th_9310030.pdf",
                "key":
                "arXiv:nucl-th_9310030.pdf",
                "url":
                "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
            }],
        })
        record = LiteratureRecord.create(data)
        db.session.commit()

        def assert_record_in_es():
            current_search.flush_and_refresh("*")
            record_lit_es = (LiteratureSearch().get_record(str(
                record.id)).execute().hits.hits[0])
            assert "attachment" in record_lit_es._source["documents"][0]

        retry_until_pass(assert_record_in_es, timeout=90, retry_interval=20)
        record_first_attachment = (LiteratureSearch().get_record(str(
            record.id)).execute().hits.hits[0]["_source"]["documents"][0]
                                   ["attachment"])
        db.session.expire_all()

        record = LiteratureRecord.get_record(record.id)
        record["documents"] = [{
            "source":
            "arxiv",
            "fulltext":
            True,
            "filename":
            "new_doc.pdf",
            "key":
            "new_doc.pdf",
            "url":
            "http://www.africau.edu/images/default/sample.pdf",
        }]
        record.update(dict(record))
        db.session.commit()

        def assert_update_in_es():
            current_search.flush_and_refresh("*")
            record_lit_es = (LiteratureSearch().get_record(str(
                record.id)).execute().hits.hits[0])
            assert "new_doc.pdf" == record_lit_es._source["documents"][0][
                "key"]
            assert (record_first_attachment !=
                    record_lit_es._source["documents"][0]["attachment"])

        retry_until_pass(assert_update_in_es, timeout=90, retry_interval=20)