예제 #1
0
    def _get_dbpedia_nl_record(self, dbpedia_nl_identifier):
        if self.debug:
            self.log.info("getting.. %s " % (dbpedia_nl_identifier))
        if dbpedia_nl_identifier.lower().find("http") > -1:
            if dbpedia_nl_identifier.find('http:/') > -1:
                dbpedia_nl_identifier = dbpedia_nl_identifier.split(
                    '/')[-1].strip()
            elif dbpedia_nl_identifier.find('%2F') > -1:
                dbpedia_nl_identifier = dbpedia_nl_identifier.split(
                    '%2F')[-1].strip()
        if dbpedia_nl_identifier.lower().startswith(
                "dbp:") or dbpedia_nl_identifier.lower().startswith(
                    "dbpedia_nl:"):
            dbpedia_nl_identifier = dbpedia_nl_identifier.split(':')[0]

        if dbpedia_nl_identifier[0].islower():
            dbpedia_nl_identifier = dbpedia_nl_identifier.title()
        if dbpedia_nl_identifier.find(' ') > -1:
            dbpedia_nl_identifier = dbpedia_nl_identifier.replace(" ", "_")
        if dbpedia_nl_identifier.find('%20') > -1:
            dbpedia_nl_identifier = dbpedia_nl_identifier.replace("%20", "_")

        if dbpedia_nl_identifier.find('+') > -1 or dbpedia_nl_identifier.find(
                '_') > -1:
            q = ""
            for item in dbpedia_nl_identifier.split('+'):
                q += "+OR+prefLabel:" + item
            for item in dbpedia_nl_identifier.split('_'):
                q += "+OR+prefLabel:" + item
            url = self.DBPEDIA_NL_URL % (dbpedia_nl_identifier.strip(), q)
        else:
            url = self.DBPEDIA_NL_URL % (dbpedia_nl_identifier.strip(), "+OR+"
                                         + dbpedia_nl_identifier.strip())
        data = self.get(url)

        if not data:
            if self.debug:
                self.log.info("No DBPedia_nl data for: %s @ %s (via %s)" %
                              (dbpedia_nl_identifier, url, self.backend))
            return (False)
        else:
            if type(data) == str:
                data = simplejson.loads(data)
            if data["response"]["numFound"] > 0:
                identifier = data["response"]["docs"][0]["id"].split('/')[-1]
                record = DBPedia([identifier],
                                 backend=self.backend,
                                 log_path=self.log_path,
                                 debug=self.debug)
                record.execute()
                self[record.keys()[0]] = record[record.keys()[0]]
                s = self[record.keys()[0]].pop("sameAs")[0]
                rec = sameAs([s],
                             backend=self.backend,
                             log_path=self.log_path,
                             debug=self.debug)
                rec.execute()
                self[record.keys()[0]]["same"] = rec.values()[0]["same"]
예제 #2
0
 def __init__(self):
     self.dbpedia = DBPedia()
     self.synonyms_base = SynonymsBase()
     self.tokenizer = Tokenizer()