def candidate(text): annotation = spotlight.candidates('http://localhost:2222/rest/candidates', text, confidence=0.30, support=30, spotter='Default') return [i.get('name') for i in annotation]
def get_candidates(text): try: return spotlight.candidates(candidates_host, text, confidence=confidence_level, support=support_level) except spotlight.SpotlightException as e: return "No candidates"
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None): """Run text through a DBpedia Spotlight instance. Calls the DBpedia Spotlight instance to perform entity linking and returns the names/links it has found. See http://spotlight.dbpedia.org/ for details. This task uses a Python client for DBp Spotlight: https://github.com/aolieman/pyspotlight """ if api_url is None: server = "http://spotlight.sztaki.hu" ports_by_language = { 'en': 2222, 'fr': 2225, 'de': 2226, 'ru': 2227, 'pt': 2228, 'hu': 2229, 'it': 2230, 'es': 2231, 'nl': 2232, 'tr': 2235 } if lang not in ports_by_language: raise ValueError("Not a valid language code: %r" % lang) api_url = server + ':' + str(ports_by_language[lang]) + '/rest' api_url += "/candidates" text = fetch(doc) try: spotlight_resp = spotlight.candidates(api_url, text, confidence=conf, support=supp, spotter='Default') except (spotlight.SpotlightException, TypeError) as e: return {'error': e.message} def ensure_resource_list(annotation): if not isinstance(annotation[u'resource'], list): annotation[u'resource'] = [annotation[u'resource']] return annotation annotations = [ ensure_resource_list(annot) for annot in spotlight_resp if u'resource' in annot ] return annotations
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None): """Run text through a DBpedia Spotlight instance. Calls the DBpedia Spotlight instance to perform entity linking and returns the names/links it has found. See http://spotlight.dbpedia.org/ for details. This task uses a Python client for DBp Spotlight: https://github.com/aolieman/pyspotlight """ if api_url is None: server = "http://spotlight.sztaki.hu" ports_by_language = { 'en': 2222, 'fr': 2225, 'de': 2226, 'ru': 2227, 'pt': 2228, 'hu': 2229, 'it': 2230, 'es': 2231, 'nl': 2232, 'tr': 2235 } if lang not in ports_by_language: raise ValueError("Not a valid language code: %r" % lang) api_url = server + ':' + str(ports_by_language[lang]) + '/rest' api_url += "/candidates" text = fetch(doc) try: spotlight_resp = spotlight.candidates( api_url, text, confidence=conf, support=supp, spotter='Default' ) except (spotlight.SpotlightException, TypeError) as e: return {'error': e.message} def ensure_resource_list(annotation): if not isinstance(annotation[u'resource'], list): annotation[u'resource'] = [annotation[u'resource']] return annotation annotations = [ensure_resource_list(annot) for annot in spotlight_resp if u'resource' in annot] return annotations
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None): """Run text through a DBpedia Spotlight instance. Calls the DBpedia Spotlight instance to perform entity linking and returns the names/links it has found. See http://spotlight.dbpedia.org/ for details. This task uses a Python client for DBp Spotlight: https://github.com/aolieman/pyspotlight """ text = fetch(doc) endpoints_by_language = { 'en': "http://spotlight.sztaki.hu:2222/rest", 'de': "http://spotlight.sztaki.hu:2226/rest", 'nl': "http://spotlight.sztaki.hu:2232/rest", 'fr': "http://spotlight.sztaki.hu:2225/rest", 'it': "http://spotlight.sztaki.hu:2230/rest", 'ru': "http://spotlight.sztaki.hu:2227/rest", 'es': "http://spotlight.sztaki.hu:2231/rest", 'pt': "http://spotlight.sztaki.hu:2228/rest", 'hu': "http://spotlight.sztaki.hu:2229/rest", 'tr': "http://spotlight.sztaki.hu:2235/rest" } if lang not in endpoints_by_language and not api_url: raise ValueError("Not a valid language code: %r" % lang) if api_url is None: api_url = endpoints_by_language[lang] api_url += "/candidates" try: spotlight_resp = spotlight.candidates( api_url, text, confidence=conf, support=supp, spotter='Default' ) except (spotlight.SpotlightException, TypeError) as e: return {'error': e.message} # Return a list of annotation dictionaries annotations = [] for annotation in spotlight_resp: # Ignore annotations without disambiguation candidates if u'resource' in annotation: # Always return a list of resources, also for single candidates if isinstance(annotation[u'resource'], dict): annotation[u'resource'] = [annotation[u'resource']] annotations.append(annotation) return annotations
def dbpedia_spotlight(doc, lang='en', conf=0.5, supp=0, api_url=None): """Run text through a DBpedia Spotlight instance. Calls the DBpedia Spotlight instance to perform entity linking and returns the names/links it has found. See http://spotlight.dbpedia.org/ for details. This task uses a Python client for DBp Spotlight: https://github.com/aolieman/pyspotlight """ text = fetch(doc) endpoints_by_language = { 'en': "http://spotlight.sztaki.hu:2222/rest", 'de': "http://spotlight.sztaki.hu:2226/rest", 'nl': "http://spotlight.sztaki.hu:2232/rest", 'fr': "http://spotlight.sztaki.hu:2225/rest", 'it': "http://spotlight.sztaki.hu:2230/rest", 'ru': "http://spotlight.sztaki.hu:2227/rest", 'es': "http://spotlight.sztaki.hu:2231/rest", 'pt': "http://spotlight.sztaki.hu:2228/rest", 'hu': "http://spotlight.sztaki.hu:2229/rest", 'tr': "http://spotlight.sztaki.hu:2235/rest" } if lang not in endpoints_by_language and not api_url: raise ValueError("Not a valid language code: %r" % lang) if api_url is None: api_url = endpoints_by_language[lang] api_url += "/candidates" try: spotlight_resp = spotlight.candidates(api_url, text, confidence=conf, support=supp, spotter='Default') except (spotlight.SpotlightException, TypeError) as e: return {'error': e.message} # Return a list of annotation dictionaries annotations = [] for annotation in spotlight_resp: # Ignore annotations without disambiguation candidates if u'resource' in annotation: # Always return a list of resources, also for single candidates if isinstance(annotation[u'resource'], dict): annotation[u'resource'] = [annotation[u'resource']] annotations.append(annotation) return annotations
def getCandidatesWithSpotligh(text, confidence): try: annotationsCandidates = spotlight.candidates( 'http://model.dbpedia-spotlight.org/en/candidates', text, confidence=confidence, support=2) except Exception as e: print("Error: " + str(e)) return None return annotationsCandidates
def dbpedia_spotlight(doc, lang="en", conf=0.5, supp=0, api_url=None): """Run text through a DBpedia Spotlight instance. Calls the DBpedia Spotlight instance to perform entity linking and returns the names/links it has found. See http://spotlight.dbpedia.org/ for details. This task uses a Python client for DBp Spotlight: https://github.com/aolieman/pyspotlight """ if api_url is None: server = "http://spotlight.sztaki.hu" ports_by_language = { "en": 2222, "fr": 2225, "de": 2226, "ru": 2227, "pt": 2228, "hu": 2229, "it": 2230, "es": 2231, "nl": 2232, "tr": 2235, } if lang not in ports_by_language: raise ValueError("Not a valid language code: %r" % lang) api_url = server + ":" + str(ports_by_language[lang]) + "/rest" api_url += "/candidates" text = fetch(doc) try: spotlight_resp = spotlight.candidates(api_url, text, confidence=conf, support=supp, spotter="Default") except (spotlight.SpotlightException, TypeError) as e: return {"error": e.message} def ensure_resource_list(annotation): if not isinstance(annotation[u"resource"], list): annotation[u"resource"] = [annotation[u"resource"]] return annotation annotations = [ensure_resource_list(annot) for annot in spotlight_resp if u"resource" in annot] return annotations
def test_missing_annotation(): spotlight.candidates('localhost', 'asdasdasd', headers={'fake_response': '{"Test": "Win"}'})
def test_single_candidate(): # Test with a single returned candidate, as was reported by issue #3. # Thanks to aolieman for the awesome test data! data = """ { "annotation":{ "@text":"Industrial Design at the Technische Universiteit Delft", "surfaceForm":{ "@name":"Technische Universiteit Delft", "@offset":"25", "resource":[ { "@label":"Technische Universiteit Delft", "@uri":"Technische_Universiteit_Delft", "@contextualScore":"0.9991813164782087", "@percentageOfSecondRank":"0.1422872887244497", "@support":"3", "@priorScore":"2.8799662606192636E-8", "@finalScore":"0.8754365122251001", "@types":"" }, { "@label":"Delft University of Technology", "@uri":"Delft_University_of_Technology", "@contextualScore":"8.186418452925803E-4", "@percentageOfSecondRank":"0.0", "@support":"521", "@priorScore":"5.001541405942121E-6", "@finalScore":"0.12456348777489806", "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University" } ] } } } """ candidates = spotlight.candidates('http://localhost', 'asdasdasd', headers={'fake_response': data}) expected_out = [ {u'resource': [ { u'finalScore': 0.8754365122251001, u'support': 3, u'uri': u'Technische_Universiteit_Delft', u'label': u'Technische Universiteit Delft', u'types': u'', u'percentageOfSecondRank': 0.1422872887244497, u'priorScore': 2.8799662606192636e-08, u'contextualScore': 0.9991813164782087 }, { u'finalScore': 0.12456348777489806, u'support': 521, u'uri': u'Delft_University_of_Technology', u'label': u'Delft University of Technology', u'types': u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University', u'percentageOfSecondRank': 0.0, u'priorScore': 5.001541405942121e-06, u'contextualScore': 0.0008186418452925803 }, ], u'name': u'Technische Universiteit Delft', u'offset': 25 } ] eq_(candidates, expected_out)
def test_missing_surfaceForms(): spotlight.candidates('http://localhost', 'asdasdasd', headers={'fake_response': '{"annotation": {"Test": "Win"}}'})
def candidate(text): annotation = spotlight.candidates( "http://localhost:2222/rest/candidates", text, confidence=0.30, support=30, spotter="Default" ) return [i.get("name") for i in annotation]
def test_missing_surfaceForms(): with assert_raises(spotlight.SpotlightException): spotlight.candidates( 'http://localhost', 'asdasdasd', headers={'fake_response': b'{"annotation": {"Test": "Win"}}'})
def test_single_candidate(): # Test with a single returned candidate, as was reported by issue #3. # Thanks to aolieman for the awesome test data! data = """ { "annotation":{ "@text":"Industrial Design at the Technische Universiteit Delft", "surfaceForm":{ "@name":"Technische Universiteit Delft", "@offset":"25", "resource":[ { "@label":"Technische Universiteit Delft", "@uri":"Technische_Universiteit_Delft", "@contextualScore":"0.9991813164782087", "@percentageOfSecondRank":"0.1422872887244497", "@support":"3", "@priorScore":"2.8799662606192636E-8", "@finalScore":"0.8754365122251001", "@types":"" }, { "@label":"Delft University of Technology", "@uri":"Delft_University_of_Technology", "@contextualScore":"8.186418452925803E-4", "@percentageOfSecondRank":"0.0", "@support":"521", "@priorScore":"5.001541405942121E-6", "@finalScore":"0.12456348777489806", "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University" } ] } } } """ candidates = spotlight.candidates('http://localhost', 'asdasdasd', headers={'fake_response': data}) expected_out = [{ u'resource': [ { u'finalScore': 0.8754365122251001, u'support': 3, u'uri': u'Technische_Universiteit_Delft', u'label': u'Technische Universiteit Delft', u'types': u'', u'percentageOfSecondRank': 0.1422872887244497, u'priorScore': 2.8799662606192636e-08, u'contextualScore': 0.9991813164782087 }, { u'finalScore': 0.12456348777489806, u'support': 521, u'uri': u'Delft_University_of_Technology', u'label': u'Delft University of Technology', u'types': u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University', u'percentageOfSecondRank': 0.0, u'priorScore': 5.001541405942121e-06, u'contextualScore': 0.0008186418452925803 }, ], u'name': u'Technische Universiteit Delft', u'offset': 25 }] eq_(candidates, expected_out)
def test_missing_surfaceForms(): with assert_raises(spotlight.SpotlightException): spotlight.candidates('http://localhost', 'asdasdasd', headers={'fake_response': b'{"annotation": {"Test": "Win"}}'})
def test_missing_surfaceForms(): spotlight.candidates( 'localhost', 'asdasdasd', headers={'fake_response': '{"annotation": {"Test": "Win"}}'})