Beispiel #1
0
def candidate(text):
    annotation = spotlight.candidates('http://localhost:2222/rest/candidates',
                                      text,
                                      confidence=0.30,
                                      support=30,
                                      spotter='Default')
    return [i.get('name') for i in annotation]
Beispiel #2
0
def get_candidates(text):
    try:
        return spotlight.candidates(candidates_host,
                                    text,
                                    confidence=confidence_level,
                                    support=support_level)
    except spotlight.SpotlightException as e:
        return "No candidates"
Beispiel #3
0
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None):
    """Run text through a DBpedia Spotlight instance.

    Calls the DBpedia Spotlight instance to perform entity linking and
    returns the names/links it has found.

    See http://spotlight.dbpedia.org/ for details.
    This task uses a Python client for DBp Spotlight:
    https://github.com/aolieman/pyspotlight
    """

    if api_url is None:
        server = "http://spotlight.sztaki.hu"

        ports_by_language = {
            'en': 2222,
            'fr': 2225,
            'de': 2226,
            'ru': 2227,
            'pt': 2228,
            'hu': 2229,
            'it': 2230,
            'es': 2231,
            'nl': 2232,
            'tr': 2235
        }
        if lang not in ports_by_language:
            raise ValueError("Not a valid language code: %r" % lang)

        api_url = server + ':' + str(ports_by_language[lang]) + '/rest'

    api_url += "/candidates"

    text = fetch(doc)

    try:
        spotlight_resp = spotlight.candidates(api_url,
                                              text,
                                              confidence=conf,
                                              support=supp,
                                              spotter='Default')
    except (spotlight.SpotlightException, TypeError) as e:
        return {'error': e.message}

    def ensure_resource_list(annotation):
        if not isinstance(annotation[u'resource'], list):
            annotation[u'resource'] = [annotation[u'resource']]
        return annotation

    annotations = [
        ensure_resource_list(annot) for annot in spotlight_resp
        if u'resource' in annot
    ]

    return annotations
Beispiel #4
0
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None):
    """Run text through a DBpedia Spotlight instance.

    Calls the DBpedia Spotlight instance to perform entity linking and
    returns the names/links it has found.

    See http://spotlight.dbpedia.org/ for details.
    This task uses a Python client for DBp Spotlight:
    https://github.com/aolieman/pyspotlight
    """

    if api_url is None:
        server = "http://spotlight.sztaki.hu"

        ports_by_language = {
            'en': 2222,
            'fr': 2225,
            'de': 2226,
            'ru': 2227,
            'pt': 2228,
            'hu': 2229,
            'it': 2230,
            'es': 2231,
            'nl': 2232,
            'tr': 2235
        }
        if lang not in ports_by_language:
            raise ValueError("Not a valid language code: %r" % lang)

        api_url = server + ':' + str(ports_by_language[lang]) + '/rest'

    api_url += "/candidates"

    text = fetch(doc)

    try:
        spotlight_resp = spotlight.candidates(
            api_url, text,
            confidence=conf,
            support=supp,
            spotter='Default'
        )
    except (spotlight.SpotlightException, TypeError) as e:
        return {'error': e.message}

    def ensure_resource_list(annotation):
        if not isinstance(annotation[u'resource'], list):
            annotation[u'resource'] = [annotation[u'resource']]
        return annotation

    annotations = [ensure_resource_list(annot)
                   for annot in spotlight_resp if u'resource' in annot]

    return annotations
Beispiel #5
0
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None):
    """Run text through a DBpedia Spotlight instance.

    Calls the DBpedia Spotlight instance to perform entity linking and
    returns the names/links it has found.

    See http://spotlight.dbpedia.org/ for details.
    This task uses a Python client for DBp Spotlight:
    https://github.com/aolieman/pyspotlight
    """
    text = fetch(doc)

    endpoints_by_language = {
        'en': "http://spotlight.sztaki.hu:2222/rest",
        'de': "http://spotlight.sztaki.hu:2226/rest",
        'nl': "http://spotlight.sztaki.hu:2232/rest",
        'fr': "http://spotlight.sztaki.hu:2225/rest",
        'it': "http://spotlight.sztaki.hu:2230/rest",
        'ru': "http://spotlight.sztaki.hu:2227/rest",
        'es': "http://spotlight.sztaki.hu:2231/rest",
        'pt': "http://spotlight.sztaki.hu:2228/rest",
        'hu': "http://spotlight.sztaki.hu:2229/rest",
        'tr': "http://spotlight.sztaki.hu:2235/rest"
    }

    if lang not in endpoints_by_language and not api_url:
        raise ValueError("Not a valid language code: %r" % lang)

    if api_url is None:
        api_url = endpoints_by_language[lang]

    api_url += "/candidates"

    try:
        spotlight_resp = spotlight.candidates(
            api_url, text,
            confidence=conf,
            support=supp,
            spotter='Default'
        )
    except (spotlight.SpotlightException, TypeError) as e:
        return {'error': e.message}

    # Return a list of annotation dictionaries
    annotations = []
    for annotation in spotlight_resp:
        # Ignore annotations without disambiguation candidates
        if u'resource' in annotation:
            # Always return a list of resources, also for single candidates
            if isinstance(annotation[u'resource'], dict):
                annotation[u'resource'] = [annotation[u'resource']]
            annotations.append(annotation)

    return annotations
Beispiel #6
0
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None):
    """Run text through a DBpedia Spotlight instance.

    Calls the DBpedia Spotlight instance to perform entity linking and
    returns the names/links it has found.

    See http://spotlight.dbpedia.org/ for details.
    This task uses a Python client for DBp Spotlight:
    https://github.com/aolieman/pyspotlight
    """
    text = fetch(doc)

    endpoints_by_language = {
        'en': "http://spotlight.sztaki.hu:2222/rest",
        'de': "http://spotlight.sztaki.hu:2226/rest",
        'nl': "http://spotlight.sztaki.hu:2232/rest",
        'fr': "http://spotlight.sztaki.hu:2225/rest",
        'it': "http://spotlight.sztaki.hu:2230/rest",
        'ru': "http://spotlight.sztaki.hu:2227/rest",
        'es': "http://spotlight.sztaki.hu:2231/rest",
        'pt': "http://spotlight.sztaki.hu:2228/rest",
        'hu': "http://spotlight.sztaki.hu:2229/rest",
        'tr': "http://spotlight.sztaki.hu:2235/rest"
    }

    if lang not in endpoints_by_language and not api_url:
        raise ValueError("Not a valid language code: %r" % lang)

    if api_url is None:
        api_url = endpoints_by_language[lang]

    api_url += "/candidates"

    try:
        spotlight_resp = spotlight.candidates(api_url,
                                              text,
                                              confidence=conf,
                                              support=supp,
                                              spotter='Default')
    except (spotlight.SpotlightException, TypeError) as e:
        return {'error': e.message}

    # Return a list of annotation dictionaries
    annotations = []
    for annotation in spotlight_resp:
        # Ignore annotations without disambiguation candidates
        if u'resource' in annotation:
            # Always return a list of resources, also for single candidates
            if isinstance(annotation[u'resource'], dict):
                annotation[u'resource'] = [annotation[u'resource']]
            annotations.append(annotation)

    return annotations
Beispiel #7
0
def getCandidatesWithSpotligh(text, confidence):

    try:
        annotationsCandidates = spotlight.candidates(
            'http://model.dbpedia-spotlight.org/en/candidates',
            text,
            confidence=confidence,
            support=2)
    except Exception as e:
        print("Error: " + str(e))
        return None

    return annotationsCandidates
Beispiel #8
0
def dbpedia_spotlight(doc, lang="en", conf=0.5, supp=0, api_url=None):
    """Run text through a DBpedia Spotlight instance.

    Calls the DBpedia Spotlight instance to perform entity linking and
    returns the names/links it has found.

    See http://spotlight.dbpedia.org/ for details.
    This task uses a Python client for DBp Spotlight:
    https://github.com/aolieman/pyspotlight
    """

    if api_url is None:
        server = "http://spotlight.sztaki.hu"

        ports_by_language = {
            "en": 2222,
            "fr": 2225,
            "de": 2226,
            "ru": 2227,
            "pt": 2228,
            "hu": 2229,
            "it": 2230,
            "es": 2231,
            "nl": 2232,
            "tr": 2235,
        }
        if lang not in ports_by_language:
            raise ValueError("Not a valid language code: %r" % lang)

        api_url = server + ":" + str(ports_by_language[lang]) + "/rest"

    api_url += "/candidates"

    text = fetch(doc)

    try:
        spotlight_resp = spotlight.candidates(api_url, text, confidence=conf, support=supp, spotter="Default")
    except (spotlight.SpotlightException, TypeError) as e:
        return {"error": e.message}

    def ensure_resource_list(annotation):
        if not isinstance(annotation[u"resource"], list):
            annotation[u"resource"] = [annotation[u"resource"]]
        return annotation

    annotations = [ensure_resource_list(annot) for annot in spotlight_resp if u"resource" in annot]

    return annotations
Beispiel #9
0
def test_missing_annotation():
    spotlight.candidates('localhost', 'asdasdasd',
            headers={'fake_response': '{"Test": "Win"}'})
Beispiel #10
0
def test_single_candidate():
    # Test with a single returned candidate, as was reported by issue #3.
    # Thanks to aolieman for the awesome test data!
    data = """
{
   "annotation":{
      "@text":"Industrial Design at the Technische Universiteit Delft",
      "surfaceForm":{
         "@name":"Technische Universiteit Delft",
         "@offset":"25",
         "resource":[
            {
               "@label":"Technische Universiteit Delft",
               "@uri":"Technische_Universiteit_Delft",
               "@contextualScore":"0.9991813164782087",
               "@percentageOfSecondRank":"0.1422872887244497",
               "@support":"3",
               "@priorScore":"2.8799662606192636E-8",
               "@finalScore":"0.8754365122251001",
               "@types":""
            },
            {
               "@label":"Delft University of Technology",
               "@uri":"Delft_University_of_Technology",
               "@contextualScore":"8.186418452925803E-4",
               "@percentageOfSecondRank":"0.0",
               "@support":"521",
               "@priorScore":"5.001541405942121E-6",
               "@finalScore":"0.12456348777489806",
               "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University"
            }
         ]
      }
   }
}
    """
    candidates = spotlight.candidates('http://localhost', 'asdasdasd',
                                      headers={'fake_response': data})
    expected_out = [
        {u'resource':
            [
                {
                    u'finalScore': 0.8754365122251001,
                    u'support': 3,
                    u'uri': u'Technische_Universiteit_Delft',
                    u'label': u'Technische Universiteit Delft',
                    u'types': u'',
                    u'percentageOfSecondRank': 0.1422872887244497,
                    u'priorScore': 2.8799662606192636e-08,
                    u'contextualScore': 0.9991813164782087
                },
                {
                    u'finalScore': 0.12456348777489806,
                    u'support': 521,
                    u'uri': u'Delft_University_of_Technology',
                    u'label': u'Delft University of Technology',
                    u'types': u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University',
                    u'percentageOfSecondRank': 0.0,
                    u'priorScore': 5.001541405942121e-06,
                    u'contextualScore': 0.0008186418452925803
                },
             ],
         u'name': u'Technische Universiteit Delft',
         u'offset': 25
        }
    ]
    eq_(candidates, expected_out)
Beispiel #11
0
def test_missing_surfaceForms():
    spotlight.candidates('http://localhost', 'asdasdasd',
            headers={'fake_response': '{"annotation": {"Test": "Win"}}'})
Beispiel #12
0
def candidate(text):
    annotation = spotlight.candidates(
        "http://localhost:2222/rest/candidates", text, confidence=0.30, support=30, spotter="Default"
    )
    return [i.get("name") for i in annotation]
Beispiel #13
0
def test_missing_surfaceForms():
    with assert_raises(spotlight.SpotlightException):
        spotlight.candidates(
            'http://localhost',
            'asdasdasd',
            headers={'fake_response': b'{"annotation": {"Test": "Win"}}'})
Beispiel #14
0
def test_single_candidate():
    # Test with a single returned candidate, as was reported by issue #3.
    # Thanks to aolieman for the awesome test data!
    data = """
{
   "annotation":{
      "@text":"Industrial Design at the Technische Universiteit Delft",
      "surfaceForm":{
         "@name":"Technische Universiteit Delft",
         "@offset":"25",
         "resource":[
            {
               "@label":"Technische Universiteit Delft",
               "@uri":"Technische_Universiteit_Delft",
               "@contextualScore":"0.9991813164782087",
               "@percentageOfSecondRank":"0.1422872887244497",
               "@support":"3",
               "@priorScore":"2.8799662606192636E-8",
               "@finalScore":"0.8754365122251001",
               "@types":""
            },
            {
               "@label":"Delft University of Technology",
               "@uri":"Delft_University_of_Technology",
               "@contextualScore":"8.186418452925803E-4",
               "@percentageOfSecondRank":"0.0",
               "@support":"521",
               "@priorScore":"5.001541405942121E-6",
               "@finalScore":"0.12456348777489806",
               "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University"
            }
         ]
      }
   }
}
    """
    candidates = spotlight.candidates('http://localhost',
                                      'asdasdasd',
                                      headers={'fake_response': data})
    expected_out = [{
        u'resource': [
            {
                u'finalScore': 0.8754365122251001,
                u'support': 3,
                u'uri': u'Technische_Universiteit_Delft',
                u'label': u'Technische Universiteit Delft',
                u'types': u'',
                u'percentageOfSecondRank': 0.1422872887244497,
                u'priorScore': 2.8799662606192636e-08,
                u'contextualScore': 0.9991813164782087
            },
            {
                u'finalScore': 0.12456348777489806,
                u'support': 521,
                u'uri': u'Delft_University_of_Technology',
                u'label': u'Delft University of Technology',
                u'types':
                u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University',
                u'percentageOfSecondRank': 0.0,
                u'priorScore': 5.001541405942121e-06,
                u'contextualScore': 0.0008186418452925803
            },
        ],
        u'name':
        u'Technische Universiteit Delft',
        u'offset':
        25
    }]
    eq_(candidates, expected_out)
Beispiel #15
0
def test_missing_surfaceForms():
    with assert_raises(spotlight.SpotlightException):
        spotlight.candidates('http://localhost', 'asdasdasd',
                             headers={'fake_response': b'{"annotation": {"Test": "Win"}}'})
Beispiel #16
0
def test_missing_annotation():
    spotlight.candidates('localhost',
                         'asdasdasd',
                         headers={'fake_response': '{"Test": "Win"}'})
Beispiel #17
0
def test_missing_surfaceForms():
    spotlight.candidates(
        'localhost',
        'asdasdasd',
        headers={'fake_response': '{"annotation": {"Test": "Win"}}'})