コード例 #1
0
ファイル: tests.py プロジェクト: vc2014/pyspotlight
def test_protocol_missing():
    spotlight.annotate('localhost',
                       'asdasdasd',
                       headers={
                           'fake_response': 'invalid json',
                           'fake_status': 502
                       })
コード例 #2
0
def check_spotlight(tweets_list):
    quiz_cand_list = []
    for i in range(len(tweets_list)):
        text = tweets_list[i]['text']
        title = tweets_list[i]['title']
        try:
            annotations_text = spotlight.annotate(spotlight_server, text)
            annotations_title = spotlight.annotate(spotlight_server, title)

            text_surfaceform = {word['surfaceForm'] for word in annotations_text}
            title_surfaceform = {word['surfaceForm'] for word in annotations_title}
            number_set = get_number(text, title)
            blank_list_cand = list(text_surfaceform & title_surfaceform)
            blank_list = []
            for j in range(len(blank_list_cand)):
                word = blank_list_cand[j]
                if Candidate_selector(word) or word.isdigit():
                    blank_list.append(word)
            for word in number_set:
                if word not in blank_list:
                    blank_list.append(word)
            if len(blank_list) != 0:
                blank_cand = '_'.join(blank_list)
                tweets_list[i].update({'blank_cand':blank_cand})
                quiz_cand_list.append(tweets_list[i])
        except:
            pass
    return quiz_cand_list
コード例 #3
0
ファイル: tests.py プロジェクト: pablomendes/pyspotlight
def test_http_fail():
    spotlight.annotate('localhost',
                       'asdasdasd',
                       headers={
                           'fake_response': 'invalid json',
                           'fake_status': 502
                       })
コード例 #4
0
ファイル: tests.py プロジェクト: zolekode/pyspotlight
def test_http_fail():
    with assert_raises(spotlight.requests.exceptions.HTTPError):
        spotlight.annotate('http://localhost',
                           'asdasdasd',
                           headers={
                               'fake_response': b'invalid json',
                               'fake_status': 502
                           })
コード例 #5
0
ファイル: tests.py プロジェクト: zolekode/pyspotlight
def test_protocol_missing():
    with assert_raises(spotlight.SpotlightException):
        spotlight.annotate('localhost',
                           'asdasdasd',
                           headers={
                               'fake_response': b'invalid json',
                               'fake_status': 502
                           })
コード例 #6
0
def get_entities():
    global df
    global COUNTER
    global entities
    for index, x in df.iterrows():

        if COUNTER % 10 == 0:
            print "%s documents tagged" % COUNTER

        COUNTER += 1

        sub = {"uri": None, "type": None, "offset": None}
        obj = {"uri": None, "type": None, "offset": None}

        # sub_type, obj_type = x.type.split("-")[0]
        # entities = []
        # types = []

        try:
            # shorten sentence to speedup.

            es = spotlight.annotate(SPOTLIGHT_URL, x['sub'], SPOTLIGHT_CONF,
                                    SPOTLIGHT_SUPPORT)
            eo = spotlight.annotate(SPOTLIGHT_URL, x['obj'], SPOTLIGHT_CONF,
                                    SPOTLIGHT_SUPPORT)
            k = es + eo

            for e in k:
                if e["surfaceForm"] == x['sub']:
                    sub['uri'] = e['URI'].encode('utf-8')
                    sub['type'] = [
                        i.encode('utf-8') for i in e["types"].split(',')
                        if i.startswith("DBpedia") and i != "DBpedia:Agent"
                    ]
                    sub['offset'] = e['offset']

                if e["surfaceForm"] == x['obj']:
                    obj['uri'] = e['URI'].encode('utf-8')
                    obj['type'] = [
                        i.encode('utf-8') for i in e["types"].split(',')
                        if i.startswith("DBpedia") and i != "DBpedia:Agent"
                    ]
                    obj['offset'] = e['offset']

        except Exception as e:
            print e.message

        try:
            if sub['type'] is None and x.type.split("-")[0] in types_dict:
                sub['type'] = types_dict[x.type.split("-")[0]]

            if obj['type'] is None and x.type.split("-")[1] in types_dict:
                obj['type'] = types_dict[x.type.split("-")[1]]
        except Exception as e:
            print e.message

        entities.append((sub['uri'], sub['type'], sub['offset'], obj['uri'],
                         obj['type'], obj['offset']))
コード例 #7
0
def get_entities_link(str_, language):
    import requests
    import subprocess
    #print "passou aqui"
    global count_
    count_ += 1
    print count_
    annotations = []
    #to solve float errors
    if (type(str_) == type(1.0)):
        str_ = ""
    else:
        import re
        #str_=cgi.escape(str_).encode('ascii', 'xmlcharrefreplace')
        #remove html from string
        #
        str_ = cleaner.clean_html(str_)
        str_ = urllib.quote_plus(
            cgi.escape(str_).encode('ascii', 'xmlcharrefreplace'))
        #print len (str_)
        #if len (str_)  > 2000:
        #	print str_

    try:
        if language == "english":
            #annotations = spotlight.annotate('http://model.dbpedia-spotlight.org/en/annotate',str_, confidence=0.4, support=20)
            # url="http://localhost:8080/dexter-webapp/api/rest/annotate?min-conf=0.4&text="+str_
            # contents  = urllib2.urlopen(url).read()
            # data = json.loads(contents)

            url = 'http://localhost:8080/dexter-webapp/api/rest/annotate'
            params = {'min-conf': '0.4', 'text': str_}
            response = requests.post(url, data=params)
            data = json.loads(response.text)
            #print data['spots']
            annotations = data['spots']

        elif language == "german":
            annotations = spotlight.annotate(
                'http://api.dbpedia-spotlight.org/de/annotate',
                str_,
                confidence=0.4,
                support=20)
        elif language == "portuguese":
            annotations = spotlight.annotate(
                'http://api.dbpedia-spotlight.org/pt/annotate',
                str_,
                confidence=0.4,
                support=20)
    except:
        annotations = []
        print "error trying to annotate text= "
    #print "passou aqui 3"
    #print json.dumps(annotations)
    return json.dumps(annotations)
コード例 #8
0
def query_spotlight(key, text, failed=[]):
    try:
        res = spotlight.annotate(config.SPOTLIGHT_URL,
                                 text,
                                 confidence=config.CONFIDENCE,
                                 support=config.SUPPORT)
        time.sleep(config.API_LIMIT)
    except:
        res = False
        time.sleep(config.API_LIMIT)
        failed.append((key, text))

    if res:
        relevant = []
        for entry in res:
            # for now only take found resources
            #rel = entry['URI']

            # for multiple fields from res
            rel = {
                entry['surfaceForm']:
                [entry[x] for x in config.RELEVANT_SPOTLIGHTS]
            }
            relevant.append(rel)
        return (True, relevant)
    else:
        return (False, [])
コード例 #9
0
ファイル: dbpedia.py プロジェクト: sroecker/simple_NER
 def annotate(self, text):
     for e in spotlight.annotate(self.host,
                                 text,
                                 confidence=self.confidence,
                                 support=self.support):
         for e_type in e["types"].split(","):
             if e_type.startswith("DBpedia:"):
                 yield Entity(e["surfaceForm"],
                              e_type.split(":")[-1],
                              source_text=text,
                              data={
                                  "uri":
                                  e["URI"],
                                  "support":
                                  e["support"],
                                  "offset":
                                  e["offset"],
                                  "percentageOfSecondRank":
                                  e["percentageOfSecondRank"],
                                  "similarityScore":
                                  e["similarityScore"],
                                  "types":
                                  e["types"].split(",")
                              },
                              confidence=e["similarityScore"])
コード例 #10
0
    def run(self, document):
        """
        :param document: Document object
        :return: Document after being annotated
        """

        #document.entities = []

        for sid, (start, end) in enumerate(document.sentences_boundaries):

            try:
                annotations = spotlight.annotate(self.spotlight_url,
                                                 document.text[start:end],
                                                 self.confidence, self.support)

            except Exception as e:
                annotations = []

            for ann in annotations:

                e_start = document.sentences_boundaries[sid][0] + ann['offset']

                if type(ann['surfaceForm']) not in [str, unicode]:
                    ann['surfaceForm'] = str(ann['surfaceForm'])

                e_end = e_start + len(ann['surfaceForm'])

                entity = Entity(ann['URI'],
                                boundaries=(e_start, e_end),
                                surfaceform=ann['surfaceForm'],
                                annotator=self.annotator_name)

                document.entities.append(entity)

        return document
コード例 #11
0
def dbpedia_get(pdf_file_name):
    output_folder = "C:/Users/advai/PycharmProjects/output/NER_Detection"
    data_folder = "C:/Users/advai/PycharmProjects/Data/"
    json_path = os.path.join(data_folder + "Jsons/", pdf_file_name + "/")
    if not len(os.listdir(json_path)):
        get_create_json_images(pdf_file_name)
    words, sentences = read_results(pdf_file_name)
    only_place_filter = {
        'policy': "whitelist",
        'types': "DBpedia:Location, DBpedia:Organization",
        'coreferenceResolution': False
    }
    for i in range(len(sentences)):
        for j in range(len(sentences[i][1])):
            print(sentences[i][1][j][0])
            if len(sentences[i][1][j]
                   [0]) <= 2 or "no" in sentences[i][1][j][0].lower():
                continue
            else:
                try:
                    annotations = spotlight.annotate(
                        'http://15.206.75.50/rest/annotate',
                        '{}'.format(sentences[i][1][j][0]),
                        confidence=0.0,
                        support=0,
                        filters=only_place_filter)
                    split_annotations = annotations[0]['types'].split(",")
                    print(sentences[i][1][j][0], split_annotations)
                except:
                    pass
コード例 #12
0
def DbpediaResults(txt):
    dp_dict = {}
    # print('dp start')
    # t1 = time.time()
    # dp = 1
    try:
        ano_dp = spotlight.annotate(
            'http://159.226.125.180:8080/rest/annotate',
            txt,
            confidence=0.4,
            support=20,
            spotter='Default')
        # ano_dp = spotlight.annotate('http://api.dbpedia-spotlight.org/en/annotate', txt, confidence=0.4, support=20,spotter='Default')
        for a in ano_dp:
            if a['types'] != '':
                # dp_dict[a['surfaceForm']]=a['types'].split(',')[-1].split(':')[1]
                cla = a['types'].split(',')
                for c in cla:
                    if c.startswith('DBpedia'):
                        dp_dict[a['surfaceForm']] = c.split(':')[1].upper()
                        break
    except:
        # print('no dbpedia results')
        pass
    # print('dp ', time.time() - t1)
    # print('dp end')
    return dp_dict
コード例 #13
0
def topic_entities(doc):
    """Find named entities in the topic using
    dbpedia spotlight"""

    url = 'http://model.dbpedia-spotlight.org/en/annotate'
    only_place_filter = {
        'policy': "whitelist",
        'types': "schema:Place",
        'coreferenceResolution': False
    }
    documents = topic_documents(doc)

    for index in range(len(documents)):
        document = documents[index]
        try:
            entities = dict()
            for e in spotlight.annotate(url,
                                        document,
                                        confidence=0.5,
                                        support=50):
                entities[e['surfaceForm']] = e['URI']

        except (spotlight.SpotlightException, HTTPError):
            entities = {}
        doc['topics'][index]['entities'] = list(entities.items())

    return doc
コード例 #14
0
def annotations(text):
    try:
        annot = spotlight.annotate(spotlightURL,
                                   text,
                                   confidence=0.4,
                                   support=20,
                                   spotter='Default')
    except spotlight.SpotlightException:
        annot = ''
    except requests.exceptions.HTTPError:
        annot = ''
    triplets = []
    print(annot)
    for elt in annot:
        subject = elt['URI'][len('http://dbpedia.org/resource/'):]
        function = 'type'
        try:
            objet = elt['types']
        except KeyError:
            objet = ''
        objet = objet.split(',')
        objet = [
            x[len('DBpedia:'):] for x in objet if x.startswith('DBpedia:')
        ]
        for o in objet:
            triplets.append((subject, function, o))
    return triplets
コード例 #15
0
def getAnnotations(textcontent, filename):
	try:
		outputname = os.path.join('./output/', filename)
		annotations = spotlight.annotate('http://spotlight.dbpedia.org/rest/annotate', textcontent, confidence=0.5, support=20)
		subjects = []

		for i in annotations:		
			uri = i['URI']
			subject = uri.split('/')[-1]
			subjects.append(subject)

		counts = {}

		for word in subjects:	
			if word in counts:
				counts[word] += 1
			else:
				counts[word] = 1

		outfile = open(outputname, 'w+')

		for k, v in counts.iteritems():
			if v >= 10:
				line = k + '\n'
				outfile.write(line)

		outfile.close()
		transcripts.remove(filename)

		print len(transcripts), "left to annotate"
	except:
		print "sorry, the annotation failed for:", filename
コード例 #16
0
ファイル: WordNet.py プロジェクト: damodamr/ac-webServies
def spotlightSearch(term):
    spotlightTerms = []
    words = TextBlob(term).words
    #print words
    annotations = spotlight.annotate('http://spotlight.sztaki.hu:2222/rest/annotate', term, confidence=0.5, support=20,
                                     spotter='Default')
    print annotations

    for word in words:
        try:

            #print word, '\t', '\t', (wn.synset(word+'.n.01').definition()), '\t',(wn.synset(word+'.n.01').hypernyms() )
            spotlightTerms.append(word)
            spotlightTerms.append(annotations[0].get('URI'))
            spotlightTerms.append(wn.synset(word+'.n.01').definition())
            spotlightTerms.append(wn.synset(word+'.n.01').hypernyms())
            #spotlightTerms.append(wn.synset(word + '.n.01').hyponyms())
            #print "inside fucntion", word, annotations[0].get('URI')
        except:
            #print word, '\t', "Nothing"
            pass

    #print spotlightTerms
    return annotations

#term = "sound of Massive Attack"
#spotlightSearch(term)
コード例 #17
0
def dbpedia_annoations(inp_db):
    restAPI = 'http://api.dbpedia-spotlight.org/en/annotate'
    reqk = []
    inp_word = inp_db.split()
    try:
        annotation = spotlight.annotate(restAPI,
                                        inp_db,
                                        confidence=0.09,
                                        support=20)
        for terms in annotation:
            uniterms = unicodedata.normalize('NFKD', terms['URI']).encode(
                'ascii', 'ignore')
            #print(uniterms)
            sem_key = str(uniterms).split('/')[-1][0:-1].lower()
            #print (sem_key)
            if sem_key in inp_word and sem_key != 'the_who':
                reqk.append(str(uniterms).split('/')[-1][0:-1])
            else:
                if sem_key != 'the_who':
                    sem_key = sem_key.replace('_', ' ')
                    for xs in inp_word:
                        if xs[-1] == '?' or xs[-1] == '.':
                            xs = xs[:-1]
                        #print('DBp anno: '+sem_key,xs)
                        if sem_key.startswith(xs.lower()) or xs.lower(
                        ).startswith(sem_key) or sem_key.endswith(xs.lower()):
                            reqk.append(str(uniterms).split('/')[-1][0:-1])
                            break
    except:
        e = 'no annoation find in DBpedia'
        #print (e)
    return reqk
コード例 #18
0
def get_entities_by_line(nlp, line):

    try:
        annotations = spotlight.annotate(
            'http://api.dbpedia-spotlight.org/en/annotate',
            line,
            confidence=0.4,
            support=20)
        # annotations = spotlight.annotate('http://model.dbpedia-spotlight.org/en/annotate', line,
        #                                  confidence=0.4, support=20)
        entities = list()
        for re_ano in annotations:
            entity = dict()
            entity['URI'] = "<" + re_ano['URI'] + ">"
            entity['surfaceForm'] = re_ano['surfaceForm']
            entity['types'] = ""
            for ent in nlp(line).entities:
                if str(ent) == entity['surfaceForm']:
                    entity['types'] = str(ent.type)
            if len(entity['types']) == 0:
                continue
            entity['start'] = re_ano['offset']
            entity['end'] = entity['start'] + len(entity['surfaceForm'])
            entities.append(entity)
        # for entity in nlp(line).entities:
        #     for entity in entities
        return entities
    except:
        print(line)
        print("spotlight exception")
        return ""
コード例 #19
0
def main(input_folder, output_folder):
    list_of_files = os.listdir(input_folder)
    for file_name in list_of_files:
        print("Processing " + file_name + "...")
        inp_file = open(input_folder + file_name, 'r')
        location_names = inp_file.readlines()
        location_names_unique = set([x.strip() for x in location_names])
        out_file = open(output_folder + file_name, 'w')
        print(len(location_names_unique))
        for location_name in location_names_unique:
            try:
                only_place_filter = {
                    'policy': "whitelist",
                    'types': "DBpedia:Place",
                    'coreferenceResolution': False
                }

                dbpedia_output = spotlight.annotate(
                    "http://api.dbpedia-spotlight.org/en/annotate",
                    location_name.strip(),
                    filters=only_place_filter)
                curr_row = []
                curr_row.append(location_name)
                curr_row.append(dbpedia_output[0]['URI'])
                curr_row.append(str(dbpedia_output[0]['similarityScore']))
                out_file.write("\t".join(curr_row) + "\n")
            except:
                continue
        out_file.close()
コード例 #20
0
def get_linked_entities_spotlight(facts):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    for fact in facts:
        print(fact.text)
        try:
            annotations = spotlight.annotate(
                'http://model.dbpedia-spotlight.org/en/annotate',
                fact.text,
                confidence=0.4,
                support=20)
        except spotlight.SpotlightException as e:
            print('No annotaions')
            continue
        fact.set_entities(annotations)
        for annot in annotations:
            query_string = ("""
                PREFIX foaf: <http://xmlns.com/foaf/0.1/>
                SELECT ?isPrimaryTopicOf
                WHERE { <%s> foaf:isPrimaryTopicOf ?isPrimaryTopicOf }
            """) % annot['URI']
            sparql.setQuery(query_string)
            sparql.setReturnFormat(JSON)
            results = sparql.query().convert()
            for result in results["results"]["bindings"]:
                fact.set_wp_link(result["isPrimaryTopicOf"]["value"])
コード例 #21
0
def annotate_dbpedia(text, confidence, sport):
    annotations_list = []
    try:
        annotations = spotlight.annotate('https://api.dbpedia-spotlight.org/en/annotate', text, confidence=confidence)
        for annotation in annotations:
            uri = annotation['URI']
            # Recupero il tipo più specifico (ultimo della lista) e converto da CamelCase a stringa normale
            type = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", annotation['types'].rsplit(',', 1)[-1].rsplit(':', 1)[-1])
            # Ricerca nome e abstract su DBpedia a partire dall'URI identificato
            try:
                name, abstract = query_dbpedia(uri)
                if not name:
                    # Se il nome non esisto uso l'identificativo dell'entità nel testo
                    name = annotation['surfaceForm']
                if not abstract:
                    # Se l'abstract non esiste lascio il campo vuoto
                    abstract = ""
            except (TypeError, requests.exceptions.HTTPError) as error:
                print("ERROR: {} {}".format(uri, error))
                name = annotation['surfaceForm']
                abstract = ""
            # Come URI mantengo solo l'ultima parte (in lowercase) dell'URI originale
            uri = uri.rsplit('/', 1)[-1]
            # Sostituisco l'URI dell'entità nel testo
            text = re.sub(r'\b%s\b' % (annotation['surfaceForm']), uri, text)
            if type:
                annotations_list.append(name + ":" + type)
            else:
                annotations_list.append(name)
            # Memorizzo l'entità identificata nel DB
            persist(sport, uri.lower(), name, abstract, type)
    except (spotlight.SpotlightException, requests.exceptions.HTTPError) as error:
        print("DBPEDIA ERROR: {}".format(error))
    return text, annotations_list
コード例 #22
0
    def get_named_entities(self, entity_type="PERSON"):

        try:
            annotations = spotlight.annotate('http://model.dbpedia-spotlight.org/pt/annotate',
                                             self.text,
                                             confidence=0.4, support=20)
        except (ValueError,
                spotlight.SpotlightException,
                requests.exceptions.HTTPError,
                requests.exceptions.ConnectionError):
            return []

        allowed_types = {"Schema:Person", "DBpedia:Person", "Http://xmlns.com/foaf/0.1/Person"}

        result = set()
        for annotation in annotations:
            types = set(annotation["types"].split(","))

            is_person = reduce(lambda x, y: x or y, [a in types for a in allowed_types])
            if is_person:
                graph_node = annotation["surfaceForm"].upper()
                graph_node = normalize('NFKD', graph_node).encode('ASCII', 'ignore').decode('ASCII')
                result.add(graph_node)

        return result
コード例 #23
0
def dbpedia_extract_spans(line):
    validate = True
    threshold = 0.5
    text = line.strip()
    nps = []
    tokens = text.split(' ')
    try:
        token_offset_to_index = get_offset_to_index_dict(text)
        annotations = spotlight.annotate('http://localhost:2222/rest/annotate',
                                         line,
                                         confidence=threshold)
        for annotation in annotations:
            offset = annotation['offset']
            surfaceForm = annotation['surfaceForm']
            spaceNum = len(re.findall(' ', surfaceForm))
            try:
                st = token_offset_to_index[offset]
                ed = st + spaceNum + 1
                span = {'st': st, 'ed': ed, 'text': surfaceForm}
                if ' '.join(tokens[st:ed]) == surfaceForm:
                    nps.append(span)
            except KeyError as e:
                pass
        if validate:

            if not validate_nps(nps, tokens):
                pass
                # ipdb.set_trace();
    except (SpotlightException, HTTPError) as e:
        pass
    except Exception as e:
        print(e)
        # ipdb.set_trace();
    return nps
コード例 #24
0
def get_linked_entity(text, confidence=0.5):
    annotations = spotlight.annotate(
        'http://api.dbpedia-spotlight.org:2226/rest/annotate',
        text,
        confidence,
        support=20,
        spotter='Default')
    return annotations
コード例 #25
0
ファイル: getMatches.py プロジェクト: mfazzi/WishMatcher
def findMatchesFromDBPedia(requestParameter):
    "This function finds details from DBPedia Spotlight"
    annotations = spotlight.annotate('http://spotlight.sztaki.hu:2222/rest/annotate',requestParameter, confidence=0.4, support=0,spotter='Default')
    matches = annotations[0]['types'] 
    print(matches[matches.rfind(':')+1:])
    searchMatches = searchLucene(matches[matches.rfind(':')+1:])
    print len(searchMatches)
    return searchMatches
コード例 #26
0
def get_dbp_id(text, confidence=0.4, support=20):

    annotations = spotlight.annotate('http://spotlight.dbpedia.org/rest/annotate', text, confidence=confidence, support=support)

    dbpUri_list = []
    for a in annotations:
        dbpUri_list.append(a['URI'])
    return dbpUri_list
コード例 #27
0
def extract_concepts(text):
    try:
        return spotlight.annotate("http://127.0.0.1:2229/rest/annotate",
                                  text,
                                  confidence=0.5,
                                  support=100)
    except Exception as e:
        return []
コード例 #28
0
 def post(self):
     response_data = spotlight.annotate(
         'https://api.dbpedia-spotlight.org/en/annotate',
         request.json.get('data'))
     response = app.response_class(response=json.dumps(response_data),
                                   status=200,
                                   mimetype='application/json')
     return response
コード例 #29
0
def process_spotlight_api(text):
    try:
        entities = spotlight.annotate(
            "http://spotlight.dbpedia.org/rest/annotate",
            text,
            confidence=0.1,
            support=0
        )
    except:
        return []

    link_matches = HyperLink.extract_all_url(text)

    initial_entities = []
    for entity in entities:
        occ = 0
        if occ is not 0:
            occ = text.count('"', 0, entity["offset"] + len(entity["serviceForm"]) - 1)
        start = entity["offset"] + occ
        end = entity["offset"] + len(entity["surfaceForm"]) + occ

        possible_link = False
        for link_match in link_matches:
            if link_match["start"] <= start and link_match["end"] >= end:
                possible_link = True

        if not possible_link:
            e = {
                "label": entity["surfaceForm"],
                "startOffset": start,
                "endOffset": end,
                "confidence": entity["similarityScore"],
                "provenance": "dbpediaspotlight",
                "types": []
            }

            types = []
            for data_type in entity["types"].split(","):
                link = data_type
                if "DBpedia:" in data_type:
                    link = "http://en.dbpedia.org/resource/" + data_type.split(":")[1]
                if "Freebase:" in data_type:
                    link = "http://www.freebase.com" + data_type.split(":")[1]

                dbpedia_type = {
                    "typeURI": None,
                    "typeLabel": data_type,
                    "entityURI": link,
                    "confidence": entity["similarityScore"],
                    "wikiURI": DbpediaLink.get_english_wikipedia_link_from_english_resource(link)
                }
                types.append(dbpedia_type)

            e["types"].append(types)
            initial_entities.append(e)

    return initial_entities
コード例 #30
0
def retrieve_entities(text):
    annotations = spotlight.annotate(
        host,
        text,
        confidence=0,
        support=0,
        spotter='Default',
    )
    return annotations
コード例 #31
0
def complexQuery(term):
    #test = json.load(urllib2.urlopen("http://www.freesound.org/apiv2/search/text/?query="+term+"&token=06mS7W2OiXidVC2tQ4ikMfe3nomU7rBptaJgBCvp"))
    #test2 = json.load(urllib2.urlopen("https://api.jamendo.com/v3.0/tracks/?client_id=4cb8fab9&format=jsonpretty&name="+term))

    annotator = Annotator()
    dep_parse = annotator.getAnnotations(term, dep_parse=True)['dep_parse']
    dp_list = dep_parse.split('\n')

    #spotlightTerms = WordNet.spotlightSearch(term)
    #print "spotlight terms %s" %spotlightTerms
    #print "dp list  %s" %dp_list

    spotlightTerms = spotlight.annotate(
        'http://spotlight.sztaki.hu:2222/rest/annotate',
        term,
        confidence=0.3,
        support=20,
        spotter='Default')
    #print term, '\t', spotlightTerms[1].get('URI')
    #print spotlightTerms[0].get('URI')
    secondDep = ""
    query = []

    for prep in dp_list:
        elementPrep = "prep"
        if elementPrep in prep:
            print("We found preposition1: %s" %
                  prep[prep.find("_") + 1:prep.find("(")])
            prepType = prep[prep.find("_") + 1:prep.find("(")]
            print("We found preposition2: %s" %
                  prep[prep.find(" ") + 1:prep.find(")")])
            secondDep = prep[prep.find(" ") + 1:prep.find(")")].split("-")
            print secondDep[0]
            query.append(prepType)
            query.append(secondDep[0])
            if prepType == "like":
                results = DBPedia.dpbediaQuery(prepType, secondDep[0])
            else:
                results = DBPedia.dpbediaQuery(prepType,
                                               spotlightTerms[1].get('URI'))
            print results

    for query in results:

        test = json.load(
            urllib2.urlopen(
                "http://www.freesound.org/apiv2/search/text/?query=" + query +
                "&token=06mS7W2OiXidVC2tQ4ikMfe3nomU7rBptaJgBCvp"))
        test2 = json.load(
            urllib2.urlopen(
                "https://api.jamendo.com/v3.0/tracks/?client_id=4cb8fab9&format=jsonpretty&name="
                + query))

    print(test)
    #print(test2)

    return test, test2
コード例 #32
0
 def get_response(self, text):
     """@rtype: dict"""
     try:
         annotations = spotlight.annotate(self.pars["url"],
                                          text,
                                          confidence=self.pars["minconf"])
     except spotlight.SpotlightException, msg:
         print "SpotlightException: {}".format(msg)
         return {}
コード例 #33
0
def get_annotations(text):
    try:
        return spotlight.annotate(annotations_host,
                                  text,
                                  confidence=-confidence_level,
                                  support=support_level,
                                  filters=filters)
    except spotlight.SpotlightException:
        return "No annotations found"
コード例 #34
0
def enrich(graph):
    for uri in graph.subjects(predicate=RDF.type, object=URIRef('http://www.bbc.co.uk/search/schema/ContentItem')):
        for desc in graph.objects(predicate=URIRef('http://schema.org/description')):
            try:
                annotations = spotlight.annotate('http://spotlight.dbpedia.org/rest/annotate', str(desc), confidence=0.4, support=20)
                for tag in annotations:
                    graph.add((URIRef(uri), URIRef('http://www.bbc.co.uk/search/schema/tag'), URIRef(tag['URI'])))
            except spotlight.SpotlightException:
                continue
コード例 #35
0
ファイル: TagEntities.py プロジェクト: CaitlinCell/Assessment
def run_query(query):
    confidence = 0.2
    support = 15
    try:
        annotations = sp.annotate('http://spotlight.dbpedia.org/rest/annotate',query,confidence,support)
    except:
        print "No resources returned"
        annotations = []
    return annotations
コード例 #36
0
def getDbpediaMatches(requestParameterSelf, requestParameterForward):
    matchingWords = ""
    try:
        annotations = spotlight.annotate('http://spotlight.sztaki.hu:2222/rest/annotate', requestParameterSelf, confidence=0.4, support=0,spotter='Default')
        matches = annotations[0]['types'] 
        typeofline = matches[matches.rfind(':')+1:]
        matchingWords = matchingWords + typeofline
    except:
        ""
    matchingWords = getWordNetMatches(requestParameterForward,matchingWords)
    return matchingWords
コード例 #37
0
def get_spotlight_annotation(text, lang="fr"):
    import spotlight
    try:
        annotations = spotlight.annotate('http://spotlight.sztaki.hu:{}/rest/annotate'.format(LANG_PORTS[lang]),
                                         text, confidence=0.6, support=20, spotter='Default')

    except:
        print "could not get info from spotlight"
        print text
        return []
    return annotations
コード例 #38
0
def getAnnotation(text):
    annotations = spotlight.annotate('http://spotlight.dbpedia.org/rest/annotate',text,confidence=0.25, support=40)
    annotationsSorted = sorted(annotations, key=lambda k: k['similarityScore']) 
    setSpotlight=set(map(lambda x:x['URI'],annotationsSorted))

    """
    { u'URI': u'http://dbpedia.org/resource/People',
      u'offset': 321,
      u'percentageOfSecondRank': -1.0,
      u'similarityScore': 0.08647863566875458,
      u'support': 426,
      u'surfaceForm': u'people',
      u'types': u'DBpedia:TopicalConcept'}
    """
    
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('text', text, {'sentiment': 1})
    resFilt=filter(lambda x: 'disambiguated' in x, response['entities'])
    key=['dbpedia','geonames','yago','opencyc']
    resFilt
    
    
    entitySet=set()

    for r in resFilt:
        for k in key:
            if k in r['disambiguated']:
                entitySet.add(r['disambiguated'][k])
    
    
    """
    {u'count': u'1',
      u'disambiguated': {u'dbpedia': u'http://dbpedia.org/resource/Kathmandu',
       u'freebase': u'http://rdf.freebase.com/ns/m.04cx5',
       u'geo': u'27.716666666666665 85.36666666666666',
       u'geonames': u'http://sws.geonames.org/1283240/',
       u'name': u'Kathmandu',
       u'subType': [u'TouristAttraction'],
       u'website': u'http://www.kathmandu.gov.np/',
       u'yago': u'http://yago-knowledge.org/resource/Kathmandu'},
      u'relevance': u'0.33',
      u'sentiment': {u'type': u'neutral'},
      u'text': u'Kathmandu',
      u'type': u'City'},
    """
    
    entitySet.update(setSpotlight)
    
    return entitySet,annotationsSorted,response
コード例 #39
0
def main():
    entities_recognized = []
    resp = solr_db.select(query)
    DOCS_RETRIEVED = DOCS_PREVIOUSLY_ENHANCED = 0
    n_enhance_attempts = 0
    try:
        while (resp):
            for doc in resp.results:
                DOCS_RETRIEVED += 1
                if doc.has_key('entity_ss'):
                    DOCS_PREVIOUSLY_ENHANCED +=1
                    doc
                    doc_up = {'id':doc['id'], 'spotlighted_b':{'update':'true'}}
                    try:
                        solr_db.add(doc_up)
                    except SolrException, e:
                        if not e.httpcode == 400:
                            raise e
                    continue
                if doc.has_key(FIELD_TO_ENHANCE):
                    for fvalue in doc[FIELD_TO_ENHANCE]:
                        n_enhance_attempts += 1
                        if n_enhance_attempts % 100 == 0:
                            print "NDOCS:", str(DOCS_RETRIEVED), ' -> ', FIELD_TO_ENHANCE, fvalue.encode('utf-8')
                        #TODO: run each enhancer, get entity data then set the 
                        # the entity_ss using update syntax
                        try:
                            annotations = spotlight.annotate('http://spotlight.dbpedia.org/rest/annotate', fvalue)
                        except spotlight.SpotlightException, e:
                            exception_resp.append(e)
                            if not "No Resources found" in e.message:
                                print "NUM:", str(DOCS_RETRIEVED), " EEEE->", str(e)
                                print e.args, e.message
                                raise e
                        except ConnectionError, e:
                            time.sleep(1800)
                            continue
                        except HTTPError, e:
                            #TODO: logger
                            continue
コード例 #40
0
ファイル: tumblr-ld-api.py プロジェクト: dogrdon/tumblr-ld
def annotate_posts(text):
    annotations = spotlight.annotate(DBPEDIA_URL, text, confidence = CONFIDENCE, support = SUPPORT)
    return annotations
コード例 #41
0
ファイル: tests.py プロジェクト: Zezo360/pyspotlight
def test_candidates_invalid_json():
    spotlight.annotate('http://localhost', 'asdasdasd',
                       headers={'fake_response': 'invalid json'})
コード例 #42
0
ファイル: tests.py プロジェクト: Zezo360/pyspotlight
def test_missing_resources():
    spotlight.annotate('http://localhost', 'asdasdasd',
            headers={'fake_response': '{"Test": "Win"}'})
コード例 #43
0
ファイル: tests.py プロジェクト: Zezo360/pyspotlight
def test_http_fail():
    spotlight.annotate('http://localhost', 'asdasdasd',
                       headers={'fake_response': 'invalid json',
                                'fake_status': 502})
コード例 #44
0
ファイル: tests.py プロジェクト: Zezo360/pyspotlight
def test_protocol_missing():
    spotlight.annotate('localhost', 'asdasdasd',
                       headers={'fake_response': 'invalid json',
                                'fake_status': 502})
コード例 #45
0
ファイル: tests.py プロジェクト: aolieman/pyspotlight
def test_protocol_missing():
    with assert_raises(spotlight.SpotlightException):
        spotlight.annotate('localhost', 'asdasdasd',
                           headers={'fake_response': b'invalid json',
                                    'fake_status': 502})
コード例 #46
0
ファイル: dbpediaspotlight.py プロジェクト: AKSW/CSV2RDF-WIKI
 def _recognizeEntities(self, text):
     annotationServiceUri = 'http://spotlight.dbpedia.org/rest/annotate'
     confidence = 0.5
     support = 20
     return spotlight.annotate(annotationServiceUri, text, confidence=confidence, support=support)
コード例 #47
0
ファイル: tests.py プロジェクト: pablomendes/pyspotlight
def test_annotation_invalid_json():
    spotlight.annotate('localhost', 'asdasdasd',
                       headers={'fake_response': 'invalid json'})
コード例 #48
0
ファイル: annotate.py プロジェクト: pablomendes/zika
LANG_PORTS = {
    "english": '2222',
    "german": '2226',
    "dutch": '2232',
    "hungarian": '2229',
    "french": '2225',
    "portuguese": '2228',
    "italian": '2230',
    "russian": '2227',
    "turkish": '2235',
    "spanish": '2231'
}

port = LANG_PORTS["english"]
url ="http://spotlight.sztaki.hu:%s/rest/annotate" % port

input_filename = sys.argv[1]
output_filename = sys.argv[2]

text = "This is a test with Berlin"
with open(input_filename,'r') as f_in, open(output_filename, 'w') as f_out:
    article = json.load(f_in)
    text = article["title"] + " \n " + article["abstract"]

    try:
        annotations = spotlight.annotate( url, text, spotter="Default", disambiguator="Default", confidence=0.5, support=0)
    except Exception, e:
        logging.error("%s %s" % (input_filename, str(e)))
        annotations = []
    json.dump(annotations, f_out, indent=2)
コード例 #49
0
def annotateHTML(html):
	annotation = spotlight.annotate("http://spotlight.dbpedia.org/rest/annotate",html)
	listeURI = []
	for i in range(len(annotation)):
		listeURI += [annotation[i]['URI']]
	return listeURI
コード例 #50
0
ファイル: tests.py プロジェクト: aolieman/pyspotlight
def test_candidates_invalid_json():
    with assert_raises(ValueError):
        spotlight.annotate('http://localhost', 'asdasdasd',
                           headers={'fake_response': b'invalid json'})
コード例 #51
0
ファイル: tests.py プロジェクト: aolieman/pyspotlight
def test_missing_resources():
    with assert_raises(spotlight.SpotlightException):
        spotlight.annotate('http://localhost', 'asdasdasd',
                           headers={'fake_response': b'{"Test": "Win"}'})
コード例 #52
0
ファイル: tests.py プロジェクト: aolieman/pyspotlight
def test_http_fail():
    with assert_raises(spotlight.requests.exceptions.HTTPError):
        spotlight.annotate('http://localhost', 'asdasdasd',
                           headers={'fake_response': b'invalid json',
                                    'fake_status': 502})
コード例 #53
0
ファイル: DBpediaSpotlight.py プロジェクト: Dauth/CV-Parser
def annotate(text):
    return spotlight.annotate(
        "http://spotlight.sztaki.hu:2222/rest/annotate", text, confidence=0.30, support=30, spotter="Default"
    )
コード例 #54
0
ファイル: LODLinkers.py プロジェクト: anukat2015/sematch
 def linking(self, query):
     annotations = spotlight.annotate(self.uri,query,confidence=self.confidence, support=self.support)
     annotations = [a['URI'] for a in annotations]
     return annotations