Exemple #1
0
def test_match_references_finds_match_when_repeated_record_with_different_scores(
        mocked_inspire_matcher_match, inspire_app):
    references = [{
        "reference": {
            "publication_info": {
                "artid": "045",
                "journal_title": "JHEP",
                "journal_volume": "06",
                "page_start": "045",
                "year": 2007,
            }
        }
    }]

    schema = load_schema("hep")
    subschema = schema["properties"]["references"]

    assert validate(references, subschema) is None

    match_result = match_references(references)
    references = match_result["matched_references"]

    assert len(references) == 1
    assert references[0]["record"][
        "$ref"] == "http://localhost:5000/api/literature/1"
    assert validate(references, subschema) is None

    assert match_result["any_link_modified"]
    assert match_result["added_recids"] == [1]
    assert match_result["removed_recids"] == []
Exemple #2
0
def refextract_url():
    """Run refextract on a URL."""
    if current_app.config.get("FEATURE_FLAG_ENABLE_REFEXTRACT_SERVICE"):
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        data = {
            "journal_kb_data": create_journal_dict(),
            "url": request.json["url"]
        }
        response = requests.post(
            f"{current_app.config['REFEXTRACT_SERVICE_URL']}/extract_references_from_url",
            headers=headers,
            data=orjson.dumps(data),
        )
        if response.status_code != 200:
            return jsonify({"message": "Can not extract references"}, 500)
        extracted_references = response.json()["extracted_references"]
    else:
        extracted_references = extract_references_from_url(
            request.json["url"],
            override_kbs_files={"journals": create_journal_dict()},
            reference_format="{title},{volume},{page}",
        )
    deduplicated_extracted_references = dedupe_list(extracted_references)
    references = map_refextract_to_schema(deduplicated_extracted_references)
    match_result = match_references(references)
    return jsonify(match_result.get("matched_references"))
Exemple #3
0
def refextract_url():
    """Run refextract on a URL."""
    extracted_references = extract_references_from_url(
        request.json["url"],
        override_kbs_files={"journals": create_journal_dict()},
        reference_format="{title},{volume},{page}",
    )
    references = map_refextract_to_schema(extracted_references)
    match_result = match_references(references)
    return jsonify(match_result.get("matched_references"))
Exemple #4
0
def refextract_url():
    """Run refextract on a URL."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_url(
            request.json["url"],
            override_kbs_files=kbs_path,
            reference_format="{title},{volume},{page}",
        )
    references = map_refextract_to_schema(extracted_references)
    match_result = match_references(references)
    return jsonify(match_result.get("matched_references"))
Exemple #5
0
def refextract_text():
    """Run refextract on a piece of text."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_string(
            request.json["text"],
            override_kbs_files=kbs_path,
            reference_format="{title},{volume},{page}",
        )
    references = map_refextract_to_schema(extracted_references)
    references = match_references(references)
    return jsonify(references)
Exemple #6
0
def test_match_references_doesnt_use_relaxed_title_matching(inspire_app):
    non_cited_record_with_pub_info_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        1,
        "document_type": ["article"],
        "publication_info": [{
            "artid": "101",
            "journal_title": "Phys. Rev. B.",
            "journal_volume": "100",
            "page_start": "100",
            "year": 2020,
        }],
        "titles": [{
            "title": "The Strongly-Interacting Light Higgs"
        }],
    }
    create_record("lit", non_cited_record_with_pub_info_json)

    cited_record_with_pub_info_json = {
        "$schema": "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number": 2,
        "document_type": ["article"],
        "texkeys": ["Shaikh:2022ynt"],
        "titles": [{
            "title": "The Strongly-Interacting Light Higgs"
        }],
    }

    create_record("lit", cited_record_with_pub_info_json)

    references = [{
        "reference": {
            "publication_info": {
                "journal_title": "Phys. Rev.",
                "journal_volume": "100",
                "page_start": "100",
            },
            "texkey": "Shaikh:2022ynt",
        }
    }]

    expected_ref = {"$ref": "http://localhost:5000/api/literature/2"}
    result = match_references(references)

    assert expected_ref == result["matched_references"][0]["record"]
Exemple #7
0
def test_match_references_no_match_when_multiple_match_different_from_previous(
    inspire_app, ):
    """Test reference matcher for when inspire-matcher returns multiple matches
    where the matched record id is not the same as the previous matched record id"""

    original_cited_record_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        1,
        "document_type": ["article"],
        "publication_info": [
            {
                "artid": "159",
                "journal_title": "JHEP",
                "journal_volume": "03",
                "page_start": "159",
                "year": 2016,
            },
            {
                "artid": "074",
                "journal_title": "JHEP",
                "journal_volume": "05",
                "material": "erratum",
                "page_start": "074",
                "year": 2017,
            },
        ],
    }

    errata_cited_record_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        2,
        "document_type": ["article"],
        "publication_info": [{
            "artid": "074",
            "journal_title": "JHEP",
            "journal_volume": "05",
            "material": "erratum",
            "page_start": "074",
            "year": 2017,
        }],
    }

    create_record("lit", data=original_cited_record_json)
    create_record("lit", data=errata_cited_record_json)

    references = [{
        "reference": {
            "publication_info": {
                "artid": "074",
                "journal_title": "JHEP",
                "journal_volume": "05",
                "page_start": "074",
                "year": 2017,
            }
        }
    }]

    schema = load_schema("hep")
    subschema = schema["properties"]["references"]

    assert validate(references, subschema) is None

    references = match_references(references)

    assert get_value(references[0], "record") is None
    assert validate(references, subschema) is None
Exemple #8
0
def test_match_references_matches_when_multiple_match_if_same_as_previous(
        inspire_app):
    """Test reference matcher for when inspire-matcher returns multiple matches
    where the matched record id is one of the previous matched record id as well"""

    original_cited_record_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        1,
        "document_type": ["article"],
        "publication_info": [
            {
                "artid": "159",
                "journal_title": "JHEP",
                "journal_volume": "03",
                "page_start": "159",
                "year": 2016,
            },
            {
                "artid": "074",
                "journal_title": "JHEP",
                "journal_volume": "05",
                "material": "erratum",
                "page_start": "074",
                "year": 2017,
            },
        ],
    }

    errata_cited_record_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        2,
        "document_type": ["article"],
        "publication_info": [{
            "artid": "074",
            "journal_title": "JHEP",
            "journal_volume": "05",
            "material": "erratum",
            "page_start": "074",
            "year": 2017,
        }],
    }

    create_record("lit", data=original_cited_record_json)
    create_record("lit", data=errata_cited_record_json)

    references = [
        {
            "reference": {
                "publication_info": {
                    "artid": "159",
                    "journal_title": "JHEP",
                    "journal_volume": "03",
                    "page_start": "159",
                    "year": 2016,
                }
            }
        },
        {
            "reference": {
                "publication_info": {
                    "artid": "074",
                    "journal_title": "JHEP",
                    "journal_volume": "05",
                    "page_start": "074",
                    "year": 2017,
                }
            }
        },
    ]

    schema = load_schema("hep")
    subschema = schema["properties"]["references"]

    assert validate(references, subschema) is None

    match_result = match_references(references)
    matched_references = match_result["matched_references"]

    assert (matched_references[1]["record"]["$ref"] ==
            "http://localhost:5000/api/literature/1")
    assert validate(matched_references, subschema) is None

    assert match_result["any_link_modified"]
    assert match_result["added_recids"] == [1, 1]
    assert match_result["removed_recids"] == []
Exemple #9
0
def get_linked_refs():
    data = request.json
    match_result = match_references(data["references"])
    return jsonify({"references": match_result.get("matched_references")})