예제 #1
0
def test_displacy_parse_spans_with_kb_id_options(en_vocab):
    """Test that spans with kb_id on a Doc are converted into displaCy's format"""
    doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
    doc.spans["sc"] = [
        Span(doc, 3, 6, "ORG", kb_id="Q790068"),
        Span(doc, 5, 6, "GPE", kb_id="Q148"),
    ]

    spans = displacy.parse_spans(
        doc, {"kb_url_template": "https://wikidata.org/wiki/{}"})
    assert isinstance(spans, dict)
    assert spans["text"] == "Welcome to the Bank of China "
    assert spans["spans"] == [
        {
            "start": 15,
            "end": 28,
            "start_token": 3,
            "end_token": 6,
            "label": "ORG",
            "kb_id": "Q790068",
            "kb_url": "https://wikidata.org/wiki/Q790068",
        },
        {
            "start": 23,
            "end": 28,
            "start_token": 5,
            "end_token": 6,
            "label": "GPE",
            "kb_id": "Q148",
            "kb_url": "https://wikidata.org/wiki/Q148",
        },
    ]
예제 #2
0
def test_displacy_parse_spans(en_vocab):
    """Test that spans on a Doc are converted into displaCy's format."""
    doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
    doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
    spans = displacy.parse_spans(doc)
    assert isinstance(spans, dict)
    assert spans["text"] == "Welcome to the Bank of China "
    assert spans["spans"] == [
        {
            "start": 15,
            "end": 28,
            "start_token": 3,
            "end_token": 6,
            "label": "ORG",
            "kb_id": "",
            "kb_url": "#",
        },
        {
            "start": 23,
            "end": 28,
            "start_token": 5,
            "end_token": 6,
            "label": "GPE",
            "kb_id": "",
            "kb_url": "#",
        },
    ]
예제 #3
0
def test_displacy_parse_spans_different_spans_key(en_vocab):
    """Test that spans in a different spans key will be parsed"""
    doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
    doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
    doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
    spans = displacy.parse_spans(doc, options={"spans_key": "custom"})

    assert isinstance(spans, dict)
    assert spans["text"] == "Welcome to the Bank of China "
    assert spans["spans"] == [{
        "start": 15,
        "end": 28,
        "start_token": 3,
        "end_token": 6,
        "label": "BANK",
        "kb_id": "",
        "kb_url": "#",
    }]