Beispiel #1
0
    def get_country_code(self, name):
        name = helpers.strip_spaces(name).lower()

        # convert names from one form to another
        conversions = {
            "america": "unitedstatesofamerica",
            "unitedstates": "unitedstatesofamerica",
            "usa": "unitedstatesofamerica",
            "us": "unitedstatesofamerica",
            "russianfederation": "russia",
            "laopdr": "laos",
            "laopeople'sdemocraticrepublic": "laos",
            "côted'ivoire": "ivorycoast",
            "czechia": "czechrepublic",
            "caboverde": "capeverde",
            "timor-leste": "easttimor",
            "uae": "unitedarabemirates",
            "macao": "macau"
        }
        if name.find("taiwan") != -1:
            name = "taiwan"
        if name in conversions:
            name = conversions[name]
        try:
            return 'wd:' + self.countries[name]
        except KeyError as e:
            p = process.extractOne(name, list(self.countries.keys()))
            if p[1] > 85:
                key = p[0]
                return 'wd:' + self.countries[key]
            print(name)
            return None
Beispiel #2
0
def get_graph_spec(info):
    url_raw, metasource = info[0], info[1]
    q = ''
    if helpers.is_bad(url_raw):
        print(url_raw)
        return q
    if url_raw.find('.') == -1: return q
    url = '<http://' + urllib.parse.quote(url_raw) + '>'
    url_item = '<http://' + urllib.parse.quote(url_raw) + '/item>'
    graph = """ GRAPH """ + url
    ms = helpers.strip_spaces(metasource)
    q = "INSERT {" + graph + "{" + url_item + "wnp:metasource wni:" + ms + """}}
    WHERE {FILTER (EXISTS {""" + graph + """{?s ?p ?o} } && 
    NOT EXISTS {""" + graph + "{ ?item wnp:metasource wni:" + ms + "}})};"
    return q
def get_graph_spec(source):
    q = ''
    if helpers.is_bad(source[1]): 
        print(source[1])
        return q
    if source[1].find('.') == -1: return q
    url = '<http://' + urllib.parse.quote(source[1]) + '>'
    url_item = '<http://' + urllib.parse.quote(source[1]) + '/item>' 
    graph = """ GRAPH """ + url 
    #url
    q += ("INSERT { " + graph + " {" + url_item + " wdt:P1896 \'" + 
            urllib.parse.quote(source[1]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P1896 ?url}})} ;" )
    #country
    if not helpers.is_bad(source[0]):
        country_code = get_country_code(source[0])
        if not helpers.is_bad(country_code):
            c = country_code
        else:
            c = helpers.clean(source[0])
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P17 \'" + c + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P17 ?country}})} ;" )
    #title
    if not helpers.is_bad(source[2]):
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P1448 \'" + helpers.clean(source[2]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P1448 ?title}})} ;" )
    #language
    if not helpers.is_bad(source[3]):
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P37 \'" + helpers.clean(source[3]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P37 ?lang}})} ;" )
    #type
    if not helpers.is_bad(source[4]):
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P31 \'" + helpers.clean(source[4]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P31 ?type}})} ;" )
    #title (native language)
    if not helpers.is_bad(source[5]):
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P1704 \'" + helpers.clean(source[5]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P1448 ?title_native}})} ;" )
    #paywall
    if not helpers.is_bad(source[6]):
        q += (" INSERT { " + graph + " {" + url_item + " wnp:paywalled \'" + helpers.clean(source[6]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wnp:paywalled ?pw}})} ;" )
    #metasource
    if not helpers.is_bad(source[7]):
        q += (" INSERT { " + graph + " {" + url_item + " wnp:metasource wni:" + 
        helpers.strip_spaces(source[7]).lower()   + """ }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wnp:metasource ?ms}})} ;" )
    #state
    if not helpers.is_bad(source[8]):
        q += (" INSERT { " + graph + " {" + url_item + " wdt:P131 \'" + helpers.clean(source[8]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wdt:P131 ?state}})} ;" )
    #wikipedia name
    if not helpers.is_bad(source[10]):
        q += (" INSERT { " + graph + " {" + url_item + " wnp:wikipedia-name \'" + helpers.clean(source[10]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wnp:wikipedia-name ?wp_name}})} ;" )
    #redirects?
    if not helpers.is_bad(source[11]):
        q += (" INSERT { " + graph + " {" + url_item + " wnp:redirect \'" + helpers.clean(source[11]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wnp:redirect ?rd}})} ;" )
    #wikipedia link
    if not helpers.is_bad(source[12]):
        q += (" INSERT { " + graph + " {" + url_item + " wnp:wikipedia-page \'" + helpers.clean(source[12]) + """\' }}
        WHERE {FILTER (NOT EXISTS {""" + graph + "{ ?item wnp:wikipedia-page ?wp_page}})} ;" )
    return q
def get_country_code(name):
    try:
        return 'wd:'+ countries[helpers.strip_spaces(name).lower()]
    except KeyError as e:
        return("\'TODO\'")
        print(e)
Beispiel #5
0
    def overwrite(self, source):
        q = ''

        # check for a valid url
        if helpers.is_bad(source[1]) or source[1].find('.') == -1:
            return q

        # add url to graph
        url = '<http://' + urllib.parse.quote(source[1].replace(
            "http://", "").replace("https://", "")) + '>'
        url_item = '<http://' + urllib.parse.quote(source[1]) + '/item>'
        graph = """ GRAPH """ + url

        # add url
        match = "{" + graph + "{ ?item wdt:P1896 ?url}}"
        q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P1896 "
              "" + url + """ }} 
              WHERE {} ;""")

        # add country
        if not helpers.is_bad(source[0]):
            country_code = self.get_country_code(source[0])
            if not helpers.is_bad(country_code):
                c = country_code
            else:
                c = helpers.clean(source[0])
            match = "{" + graph + "{ ?item wdt:P17 ?country}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P17 " + c +
                  """ }} 
              WHERE {} ;""")

        # add title
        if not helpers.is_bad(source[2]):
            match = "{" + graph + "{ ?item wdt:P1448 ?title}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P1448 \'" +
                  helpers.clean_string(source[2]) + """\' }} 
             WHERE {} ;""")

        # add language
        if not helpers.is_bad(source[3]):
            match = "{" + graph + "{ ?item wdt:P37 ?lang}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P37 \'" +
                  helpers.clean_string(source[3]) + """\' }} 
              WHERE {} ;""")

        # add type
        if not helpers.is_bad(source[4]):
            match = "{" + graph + "{ ?item wdt:P31 ?type}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P31 \'" +
                  helpers.clean_string(source[4]) + """\' }} 
              WHERE {} ;""")

        # add title (native language)
        if not helpers.is_bad(source[5]):
            match = "{" + graph + "{ ?item wdt:P1704 ?title}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P1704 \'" +
                  helpers.clean_string(source[5]) + """\'}} 
              WHERE {} ;""")

        # add paywall
        if not helpers.is_bad(source[6]):
            match = "{" + graph + "{ ?item wnp:paywalled ?pw}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wnp:paywalled \'" +
                  helpers.clean_string(source[2]) + """\' }} 
              WHERE {} ;""")

        # add metasources
        for ms in source[7]:
            if not helpers.is_bad(ms):
                match = "{" + graph + "{ ?item wnp:metasource ?ms}}"
                q += ("DELETE" + match + """
                  INSERT { """ + graph + " {" + url_item +
                      " wnp:metasource wni:" +
                      helpers.strip_spaces(ms).lower() + """ }} 
                  WHERE {} ;""")

        # add state
        if not helpers.is_bad(source[8]):
            match = "{" + graph + "{ ?item wdt:P131 ?state}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wdt:P131 \'" +
                  helpers.clean_string(source[8]) + """\' }} 
              WHERE {} ;""")

        # add wikipedia name
        if not helpers.is_bad(source[10]):
            match = "{" + graph + "{ ?item wnp:wikipedia-name ?wp_name}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item +
                  " wnp:wikipedia-name \'" + helpers.clean_string(source[10]) +
                  """\' }} 
              WHERE {} ;""")

        # add redirects?
        if not helpers.is_bad(source[11]):
            match = "{" + graph + "{ ?item wnp:redirect ?rd}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item + " wnp:redirect \'" +
                  helpers.clean_string(source[11]) + """\' }} 
              WHERE {} ;""")

        # add wikipedia link
        if not helpers.is_bad(source[12]):
            match = "{" + graph + "{ ?item wnp:wikipedia-page ?wp_page}}"
            q += ("DELETE" + match + """
              INSERT { """ + graph + " {" + url_item +
                  " wnp:wikipedia-page \'" + helpers.clean_string(source[12]) +
                  """\' }} 
              WHERE {} ;""")

        # add description
        try:
            if not helpers.is_bad(source[14]):
                match = "{" + graph + "{ ?item wnp:description ?desc}}"
                q += ("DELETE" + match + """
                  INSERT { """ + graph + " {" + url_item +
                      " wnp:description \'" +
                      helpers.clean_string(source[14]) + """\' }} 
                  WHERE {} ;""")
        except IndexError:
            None

        return q
Beispiel #6
0
 def get_ms(self, metasources):
     q = ''
     for ms in metasources:
         q += """;
             wnp:metasource wni:""" + helpers.strip_spaces(ms).lower()
     return q
Beispiel #7
0
    def no_overwrite(self, source):
        q = ''
        if helpers.is_bad(source[1]):
            print(source[1])
            return q
        # this means our url is not valid
        if source[1].find('.') == -1: return q

        # begin constructing graph spec
        # construct item
        item = '<http://' + urllib.parse.quote(source[1]) + '/item>'
        # construct item URL
        url = '<http://' + urllib.parse.quote(source[1]) + '>'
        # construct graph value
        graph = """ GRAPH """ + url

        # add URL
        q += ("INSERT { " + graph + " {" + item + " wdt:P1896 " + url + """ }} 
                WHERE {FILTER (NOT EXISTS {""" + graph +
              "{ ?item wdt:P1896 ?url}})} ;")

        # add country
        if not helpers.is_bad(source[0]):
            country_code = self.get_country_code(source[0])
            if not helpers.is_bad(country_code):
                c = country_code
            else:
                c = helpers.clean(source[0])
            q += (" INSERT { " + graph + " {" + item + " wdt:P17 \'" + c +
                  """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P17 ?country}})} ;")

        # add title title
        if not helpers.is_bad(source[2]):
            q += (" INSERT { " + graph + " {" + item + " wdt:P1448 \'" +
                  helpers.clean(source[2]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P1448 ?title}})} ;")

        # add language
        if not helpers.is_bad(source[3]):
            q += (" INSERT { " + graph + " {" + item + " wdt:P37 \'" +
                  helpers.clean(source[3]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P37 ?lang}})} ;")

        # add source type
        if not helpers.is_bad(source[4]):
            q += (" INSERT { " + graph + " {" + item + " wdt:P31 \'" +
                  helpers.clean(source[4]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P31 ?type}})} ;")

        # add title in native language
        if not helpers.is_bad(source[5]):
            q += (" INSERT { " + graph + " {" + item + " wdt:P1704 \'" +
                  helpers.clean(source[5]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P1448 ?title_native}})} ;")

        # add paywall (Yes or No)
        if not helpers.is_bad(source[6]):
            q += (" INSERT { " + graph + " {" + item + " wnp:paywalled \'" +
                  helpers.clean(source[6]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:paywalled ?pw}})} ;")

        # add metasource
        if not helpers.is_bad(source[7]):
            q += (" INSERT { " + graph + " {" + item + " wnp:metasource wni:" +
                  helpers.strip_spaces(source[7]).lower() + """ }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:metasource ?ms}})} ;")

        # add state
        if not helpers.is_bad(source[8]):
            q += (" INSERT { " + graph + " {" + item + " wdt:P131 \'" +
                  helpers.clean(source[8]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wdt:P131 ?state}})} ;")

        # add wikipedia name
        if not helpers.is_bad(source[10]):
            q += (" INSERT { " + graph + " {" + item +
                  " wnp:wikipedia-name \'" + helpers.clean(source[10]) +
                  """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:wikipedia-name ?wp_name}})} ;")

        # add redirect
        if not helpers.is_bad(source[11]):
            q += (" INSERT { " + graph + " {" + item + " wnp:redirect \'" +
                  helpers.clean(source[11]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:redirect ?rd}})} ;")

        # add wikipedia link
        if not helpers.is_bad(source[12]):
            q += (" INSERT { " + graph + " {" + item +
                  " wnp:wikipedia-page \'" + helpers.clean(source[12]) +
                  """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:wikipedia-page ?wp_page}})} ;")

        # add description
        if not helpers.is_bad(source[14]):
            q += (" INSERT { " + graph + " {" + item + " wnp:description \'" +
                  helpers.clean(source[14]) + """\' }}
            WHERE {FILTER (NOT EXISTS {""" + graph +
                  "{ ?item wnp:description ?desc}})} ;")

        return q