Esempio n. 1
0
 def __init__(self, polygon_id, cache_delay=60):
     polygon_url = u"http://polygons.openstreetmap.fr/"
     url = polygon_url + "index.py?id=" + str(polygon_id)
     s = downloader.urlread(url, cache_delay)
     url = polygon_url + "get_wkt.py?params=0&id=" + str(polygon_id)
     s = downloader.urlread(url, cache_delay)
     if s.startswith("SRID="):
         s = s.split(";", 1)[1]
     self.polygon = loads(s)
Esempio n. 2
0
 def __init__(self, polygon_id, cache_delay=60):
     polygon_url = u"http://polygons.openstreetmap.fr/"
     url = polygon_url + "index.py?id="+str(polygon_id)
     s = downloader.urlread(url, cache_delay)
     url = polygon_url + "get_wkt.py?params=0&id="+str(polygon_id)
     s = downloader.urlread(url, cache_delay)
     if s.startswith("SRID="):
         s = s.split(";", 1)[1]
     self.polygon = loads(s)
    def deprecated_list(self):
        wikiRoot = 'https://wiki.openstreetmap.org/wiki'
        data = urlread(wikiRoot + '/Template:Deprecated_features?action=raw',
                       1)

        # Tidy data up for processing
        # Eliminate wiki bold formatting
        data = data.replace("'''", "")

        # Remove HTML newlines
        data = re.sub(r'<br\s*/>', ' ', data)

        # Remove excess whitespace (also removes all newlines)
        data = " ".join(data.split())

        # Eliminate any whitespace around pipe characters
        # This makes reading the template parameters simple
        data = re.sub(r'\s?\|\s?', '|', data)

        # Eliminate templates to prevent unexpected pipe characters
        data = re.sub(r'{{{\s?lang\s?\|?\s?}}}', '', data, flags=re.I)
        # Tag template can take one or two params, with trailing | possible
        data = re.sub(r'{{(?:Tag|Key)\s?\|(.+?)\|?\s?}}',
                      lambda x: '`{}`'.format(
                          x.group(1).replace("||", "=").replace("|", "=")),
                      data,
                      flags=re.I)

        # Resolve interwiki links now
        data = re.sub(
            r'\[\[(.+?)\]\]',
            lambda x: '[{}]({}/{})'.format(x.group(1), wikiRoot,
                                           x.group(1).replace(" ", "_")), data)

        deprecated = {}
        for feature in data.split(r'{{Deprecated features/item')[1:]:
            # Unaccounted for template present in this feature
            if r'{{' in feature:
                continue

            src_key, src_val, dest = None, None, None
            for param in feature.split('|'):
                if '=' not in param:
                    continue

                k, v = param.split('=', 1)
                # k will always start with the param because we removed whitespace around | earlier
                # We don't use == because there could be space before the = character
                if (k.startswith('dkey')):
                    src_key = v
                elif (k.startswith('dvalue')):
                    src_val = v
                elif (k.startswith('suggestion')):
                    dest = v

            # Sanity check in case formatting changes or something
            if any((src_key, src_val, dest)):
                deprecated.setdefault(src_key, {})[src_val] = dest

        return deprecated
    def deprecated_list(self):
        data = urlread(u"https://wiki.openstreetmap.org/wiki/Template:Deprecated_features?action=raw&force_cache_20180805", 1)
        #data = open("Deprecated_features?action=raw").read()
        data = data.split("{{Deprecated features/item")
        dkey = re.compile(r"^\s*\|\s*dkey\s*=")
        dvalue = re.compile(r"\s*dvalue\s*=")
        suggestion = re.compile(r"^\s*\|\s*suggestion\s*=")
        dataMult = []
        for feature in data[1:]:
            deprecated_key = None
            deprecated_value = None
            deprecated_suggestion = None
            for line in feature.split("\n"):
                if dkey.match(line):
                    deprecated_key = line.split("|")[1].split("=")[1].strip()
                    t = line.split("|")
                    if len(t) > 2:
                        if dvalue.match(t[2]):
                            deprecated_value = t[2].split("=")[1].strip()

                if suggestion.match(line):
                    deprecated_suggestion = line.split("=")[1].strip()

                dataMult.append((deprecated_key, deprecated_value, deprecated_suggestion))

        deprecated = {}
        for line in dataMult:
            src_key = self.cleanWiki(line[0])
            src_val = self.cleanWiki(line[1])
            dest = self.cleanWiki(line[2])
            if src_key not in deprecated:
                deprecated[src_key] = {}
            deprecated[src_key][src_val] = dest
        return deprecated
Esempio n. 5
0
 def _get_brands(self):
     nsi_url_for_brands = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.json"
     json_str = urlread(nsi_url_for_brands, 30)
     results = json.loads(json_str)
     results = results['nsi']
     additional_brands = {}
     for tag, brands in results.items():
         if tag.startswith('brands/'):
             brand_nsi_name = tag[len('brands/'):]
             for brand in brands:
                 if "locationSet" in brand:
                     if "include" in brand["locationSet"] and self.country_code not in brand["locationSet"]["include"] and "001" not in brand["locationSet"]["include"]:
                         continue
                     if "exclude" in brand["locationSet"] and self.country_code in brand["locationSet"]["exclude"]:
                         continue
                 if "matchTags" in brand:
                     for additional_tag in brand["matchTags"]:
                         nsi_key = "{}|{}".format(additional_tag, brand["tags"]["name"])
                         additional_brands[nsi_key.lower()] = brand
                 if "matchNames" in brand:
                     for additional_name in brand["matchNames"]:
                         nsi_key = "{}|{}".format(brand_nsi_name, additional_name)
                         additional_brands[nsi_key.lower()] = brand
                 if "name" in brand["tags"]:
                     additional_brands["{}|{}".format(brand_nsi_name, brand["tags"]["name"]).lower()] = brand
                 additional_brands["{}|{}".format(brand_nsi_name, brand["displayName"]).lower()] = brand
     return additional_brands
Esempio n. 6
0
    def deprecated_list(self):
        data = urlread(u"https://wiki.openstreetmap.org/wiki/Template:Deprecated_features?action=raw&force_cache_20180805", 1)
        #data = open("Deprecated_features?action=raw").read()
        data = data.split("{{Deprecated features/item")
        dkey = re.compile(r"^\s*\|\s*dkey\s*=")
        dvalue = re.compile(r"\s*dvalue\s*=")
        suggestion = re.compile(r"^\s*\|\s*suggestion\s*=")
        dataMult = []
        for feature in data[1:]:
            deprecated_key = None
            deprecated_value = None
            deprecated_suggestion = None
            for line in feature.split("\n"):
                if dkey.match(line):
                    deprecated_key = line.split("|")[1].split("=")[1].strip()
                    t = line.split("|")
                    if len(t) > 2:
                        if dvalue.match(t[2]):
                            deprecated_value = t[2].split("=")[1].strip()

                if suggestion.match(line):
                    deprecated_suggestion = line.split("=")[1].strip()

                dataMult.append((deprecated_key, deprecated_value, deprecated_suggestion))

        deprecated = {}
        for line in dataMult:
            src_key = self.cleanWiki(line[0])
            src_val = self.cleanWiki(line[1])
            dest = self.cleanWiki(line[2])
            if src_key not in deprecated:
                deprecated[src_key] = {}
            deprecated[src_key][src_val] = dest
        return deprecated
Esempio n. 7
0
 def _get_brands(self):
     nsi_url_for_brands = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/master/dist/brands.json"
     json_str = urlread(nsi_url_for_brands, 30)
     results = json.loads(json_str)
     additional_brands = {}
     for brand_nsi_name, brand in results["brands"].items():
         if "locationSet" in brand:
             if "include" in brand[
                     "locationSet"] and self.country_code not in brand[
                         "locationSet"]["include"] and "001" not in brand[
                             "locationSet"]["include"]:
                 continue
             if "exclude" in brand[
                     "locationSet"] and self.country_code in brand[
                         "locationSet"]["exclude"]:
                 continue
         brand_nsi_name = brand_nsi_name.split("~")[0]
         if "matchTags" in brand:
             for additional_tag in brand["matchTags"]:
                 nsi_key = "{}|{}".format(additional_tag,
                                          brand_nsi_name.split("|")[1])
                 additional_brands[nsi_key.lower()] = brand
         if "matchNames" in brand:
             for additional_name in brand["matchNames"]:
                 nsi_key = "{}|{}".format(
                     brand_nsi_name.split("|")[0], additional_name)
                 additional_brands[nsi_key.lower()] = brand
         additional_brands[brand_nsi_name.lower()] = brand
     return additional_brands
Esempio n. 8
0
    def list_postcode(self):
        reline = re.compile("^[-CAN ]+$")
        # remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$")
        data = urlread(
            u"https://en.wikipedia.org/wiki/List_of_postal_codes?action=raw",
            1)
        data = filter(
            lambda t: len(t) > 2 and (t[1] != "- no codes -" or t[2] != ""),
            map(lambda x: list(map(lambda y: y.strip(), x.split("|")))[5:8],
                data.split("|-")[1:-1]))
        postcode = {}
        for line in data:
            iso = line[0][0:2]
            format_area = line[1]
            format_street = line[2]
            # note = line[3]

            postcode[iso] = {}
            if format_area != '':
                regex_area = self.parse_format(reline, format_area)
                if regex_area:
                    postcode[iso]['area'] = regex_area
            if format_street != '':
                regex_street = self.parse_format(reline, format_street)
                if regex_street:
                    postcode[iso]['street'] = regex_street
                else:
                    postcode[iso]['street'] = None

        return postcode
Esempio n. 9
0
    def list_postcode(self):
        reline = re.compile("^[-CAN ]+$")
        remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$")
        data = urlread("http://en.wikipedia.org/wiki/List_of_postal_codes?action=raw", 1)
        data = filter(lambda t: len(t)>2 and t[1] != "- no codes -", map(lambda x: map(lambda y: y.strip(), x.split("|"))[5:8], data.split("|-")[1:-1]))
        postcode = {}
        for line in data:
            iso = line[0][0:2]
            format = line[1]
            note = line[2]

            if format[-1] == ')':
                format = map(lambda x: x.strip(), format[:-1].split('('))
            else:
                format = [format]

            regexs = []
            for f in format:
                if reline.match(f):
                    regex = f.replace("N", "[0-9]").replace("A", "[A-Z]").replace("CC", self.Country)
                    regexs.append(regex)

            if len(regexs) > 1:
                postcode[iso] = "^\("+("\)|\(".join(regexs))+"\)$"
            elif len(regexs) == 1:
                postcode[iso] = "^"+regexs[0]+"$"

        return postcode
    def deprecated_list(self):
        data = urlread("http://wiki.openstreetmap.org/wiki/Template:Deprecated_features?action=raw", 1)
        #data = open("Deprecated_features?action=raw").read()
        data = data.split("{{Deprecated_features/item")
        dataMult = []
        for feature in data[1:]:
            deprecated_key = None
            deprecated_value = None
            deprecated_suggestion = None
            for line in feature.split("\n"):
                if line.startswith("| dkey=") or line.startswith("| dkey ="):
                    deprecated_key = line.split("|")[1].split("=")[1]
                    t = line.split("|")
                    if len(t) > 2:
                        t = t[2].strip()
                        if t.startswith("dvalue=") or t.startswith("dvalue ="):
                            deprecated_value = t.split("=")[1]

                if line.startswith("| suggestion=") or line.startswith("| suggestion ="):
                    deprecated_suggestion = line.split("=")[1]

                dataMult.append((deprecated_key, deprecated_value, deprecated_suggestion))

        deprecated = {}
        for line in dataMult:
            src_key = self.cleanWiki(line[0])
            src_val = self.cleanWiki(line[1])
            dest = self.cleanWiki(line[2])
            if src_key not in deprecated:
                deprecated[src_key] = {}
            deprecated[src_key][src_val] = dest
        return deprecated
Esempio n. 11
0
    def black_list(self):
        wikidata_query_for_chain_store = u"https://query.wikidata.org/sparql?query=SELECT%20DISTINCT%20%3Fitem%20%3FitemLabel%20WHERE%20{%0A%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)wd%3AQ507619%20}%20UNION%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)%20wd%3AQ1631129%20}%0A%20SERVICE%20wikibase%3Alabel%20{%20bd%3AserviceParam%20wikibase%3Alanguage%20%22[AUTO_LANGUAGE]%2Cen%22.%20}%0A}&format=json"

        json_str = urlread(wikidata_query_for_chain_store, 30)
        results = json.loads(json_str)
        should_be_brand = [elem['item']['value'].split('/')[-1] for elem in results['results']['bindings']]

        return should_be_brand
def load_poly(poly):
    try:
        print poly
        s = downloader.urlread(poly, 1)
        return parse_poly(s.split("\n"))
    except IOError as e:
        print e
        return
def load_poly(poly):
    try:
        print(poly)
        s = downloader.urlread(poly, 1)
        return parse_poly(s.split('\n'))
    except IOError as e:
        print(e)
        return
def load_poly(poly):
    try:
        print(poly)
        s = downloader.urlread(poly, 1)
        return parse_poly(s.split('\n'))
    except IOError as e:
        print(e)
        return
Esempio n. 15
0
    def black_list(self):
        wikidata_query_for_chain_store = u"https://query.wikidata.org/sparql?query=SELECT%20DISTINCT%20%3Fitem%20%3FitemLabel%20WHERE%20{%0A%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)wd%3AQ507619%20}%20UNION%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)%20wd%3AQ1631129%20}%0A%20SERVICE%20wikibase%3Alabel%20{%20bd%3AserviceParam%20wikibase%3Alanguage%20%22[AUTO_LANGUAGE]%2Cen%22.%20}%0A}&format=json"

        json_str = urlread(wikidata_query_for_chain_store, 30)
        results = json.loads(json_str)
        should_be_brand = [elem['item']['value'].split('/')[-1] for elem in results['results']['bindings']]

        return should_be_brand
Esempio n. 16
0
 def liste_des_arbres_fruitiers(self):
     reline = re.compile("\[\[([^:]*)$")
     data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_arbres_fruitiers?action=raw", 1)
     #data = open(u"Liste_des_arbres_fruitiers?action=raw").read()
     data = data.split("]]")
     for line in data:
         for res in reline.findall(line):
             for n in res.split('|'):
                 self.Tree[self.normalize(n)] = {'species:fr':res}
Esempio n. 17
0
 def liste_des_essences_europennes(self):
     reline = re.compile("^\* \[\[([^]]*)\]\][^[]*\[\[([^]]*)\]\][^[]*(?:\[\[([^]]*)\]\][^[]*)?(?:\[\[([^]]*)\]\][^[]*)?")
     data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_essences_forestières_européennes?action=raw", 1)
     #data = open(u"Liste_des_essences_forestières_européennes?action=raw").read()
     data = data.split("\n")
     for line in data:
         for res in reline.findall(line):
             for n in res[0].split('|'):
                 self.Tree[self.normalize(n)] = {'genus':res[1], 'species':'|'.join(res[2:3]), 'species:fr':res[0]}
Esempio n. 18
0
 def liste_des_arbres_fruitiers(self):
     reline = re.compile("\[\[([^:]*)$")
     data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_arbres_fruitiers?action=raw", 1)
     #data = open(u"Liste_des_arbres_fruitiers?action=raw").read()
     data = data.split("]]")
     for line in data:
         for res in reline.findall(line):
             for n in res.split('|'):
                 self.Tree[self.normalize(n)] = {'species:fr':res}
Esempio n. 19
0
 def liste_des_essences_europennes(self):
     reline = re.compile("^\* \[\[([^]]*)\]\][^[]*\[\[([^]]*)\]\][^[]*(?:\[\[([^]]*)\]\][^[]*)?(?:\[\[([^]]*)\]\][^[]*)?")
     data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_essences_forestières_européennes?action=raw", 1)
     #data = open(u"Liste_des_essences_forestières_européennes?action=raw").read()
     data = data.split("\n")
     for line in data:
         for res in reline.findall(line):
             for n in res[0].split('|'):
                 self.Tree[self.normalize(n)] = {'genus':res[1], 'species':'|'.join(res[2:3]), 'species:fr':res[0]}
    def init(self, logger):
        Plugin.init(self, logger)

        country = self.father.config.options.get("country") if self.father else None
        language = self.father.config.options.get("language") if self.father else None
        if isinstance(language, list):
            language = None
        elif language:
            language = language.split('_')[0]

        self._update_ks = {}
        self._update_kr = {}
        self._update_ks_vs = defaultdict(dict)
        self._update_kr_vs = defaultdict(dict)
        self._update_ks_vr = defaultdict(dict)
        self._update_kr_vr = defaultdict(dict)

        reline = re.compile("^\|([^|]*)\|\|([^|]*)\|\|([^|]*)\|\|([^|]*).*")

        # récupération des infos depuis https://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner
        data = urlread(u"https://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner&action=raw", 1)
        data = data.split("\n")
        for line in data:
            for res in reline.findall(line):
                only_for = res[3].strip()
                if only_for in (None, '', country, language) or (country and country.startswith(only_for)):
                    r = res[1].strip()
                    c0 = res[2].strip()
                    tags = ["fix:chair"] if c0 == "" else [c0, "fix:chair"]
                    c = stablehash(c0)
                    self.errors[c] = self.def_class(item = 3030, level = 2, tags = tags,
                        title = {'en': c0},
                        detail = T_(
'''Simple and frequent errors, the list is available
[here](https://wiki.openstreetmap.org/wiki/User:FrViPofm/TagwatchCleaner).'''))
                    if u"=" in res[0]:
                        k = res[0].split(u"=")[0].strip()
                        v = res[0].split(u"=")[1].strip()
                        if self.quoted(k):
                            k = self.quoted2re(k)
                            if self.quoted(v):
                                self._update_kr_vr[k][self.quoted2re(v)] = [r, c]
                            else:
                                self._update_kr_vs[k][v] = [r, c]
                        else:
                            if self.quoted(v):
                                self._update_ks_vr[k][self.quoted2re(v)] = [r, c]
                            else:
                                self._update_ks_vs[k][v] = [r, c]
                    else:
                        if self.quoted(res[0]):
                            self._update_kr[self.quoted2re(res[0])] = [r, c]
                        else:
                            self._update_ks[res[0]] = [r, c]
Esempio n. 21
0
    def init(self, logger):
        Plugin.init(self, logger)

        country = self.father.config.options.get("country") if self.father else None
        language = self.father.config.options.get("language") if self.father else None
        if not isinstance(language, basestring):
            language = None

        self._update_ks = {}
        self._update_kr = {}
        self._update_ks_vs = defaultdict(dict)
        self._update_kr_vs = defaultdict(dict)
        self._update_ks_vr = defaultdict(dict)
        self._update_kr_vr = defaultdict(dict)

        reline = re.compile("^\|([^|]*)\|\|([^|]*)\|\|([^|]*)\|\|([^|]*).*")

        # récupération des infos depuis http://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner
        data = urlread("http://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner&action=raw", 1)
        data = data.split("\n")
        for line in data:
            for res in reline.findall(line):
                only_for = res[3].strip()
                if only_for in (None, '', country, language):
                    r = res[1].strip()
                    c0 = res[2].strip()
                    tags = ["fix:chair"] if c0 == "" else [c0, "fix:chair"]
                    c = self.stablehash(c0.encode("utf8"))
                    self.errors[c] = { "item": 3030, "level": 2, "tag": tags, "desc": {"en": c0} }
                    if u"=" in res[0]:
                        k = res[0].split(u"=")[0].strip()
                        v = res[0].split(u"=")[1].strip()
                        if self.quoted(k):
                            k = self.quoted2re(k)
                            if self.quoted(v):
                                self._update_kr_vr[k][self.quoted2re(v)] = [r, c]
                            else:
                                self._update_kr_vs[k][v] = [r, c]
                        else:
                            if self.quoted(v):
                                self._update_ks_vr[k][self.quoted2re(v)] = [r, c]
                            else:
                                self._update_ks_vs[k][v] = [r, c]
                    else:
                        if self.quoted(res[0]):
                            self._update_kr[self.quoted2re(res[0])] = [r, c]
                        else:
                            self._update_ks[res[0]] = [r, c]
Esempio n. 22
0
    def list_postcode(self):
        reline = re.compile("^[-CAN ]+$")
        # remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$")
        data = urlread("http://en.wikipedia.org/wiki/List_of_postal_codes?action=raw", 1)
        data = filter(lambda t: len(t)>2 and t[1] != "- no codes -", map(lambda x: map(lambda y: y.strip(), x.split("|"))[5:8], data.split("|-")[1:-1]))
        postcode = {}
        for line in data:
            iso = line[0][0:2]
            format_area = line[1]
            format_street = line[2]
            # note = line[3]

            postcode[iso] = {}
            if format_area != '':
                postcode[iso]['area'] = self.parse_format(reline, format_area)
            if format_street != '':
                postcode[iso]['street'] = self.parse_format(reline, format_street)

        return postcode
Esempio n. 23
0
 def deprecated_list(self):
     data = urlread("http://wiki.openstreetmap.org/wiki/Deprecated_features?action=raw", 1)
     #data = open("Deprecated_features?action=raw").read()
     data = data[:data.index('\n|}\n')].split("|-")
     dataMult = []
     for line in data[2:]:
         item = line[2:].split(" || ")
         ss = item[1].replace('<br />', '<br/>').split('<br/>')
         for s in ss:
             dataMult.append([s, item[3]])
     deprecated = {}
     for line in dataMult:
         src = self.cleanWiki(line[0])
         dest = self.cleanWiki(line[1])
         s = src.split('=')
         if s[0] not in deprecated:
             deprecated[s[0]] = {}
         if len(s) == 2:
             deprecated[s[0]][s[1]] = dest
         else:
             deprecated[s[0]][None] = dest
     return deprecated
Esempio n. 24
0
 def _get_frequent_names(self):
     nsi_url_for_names = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/master/dist/names_keep.json"
     json_str = urlread(nsi_url_for_names, 30)
     results = json.loads(json_str)
     return set([elem.lower() for elem in results.keys()])
Esempio n. 25
0
 def _download_nsi(self):
     nsi_url = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.json"
     json_str = urlread(nsi_url, 30)
     results = json.loads(json_str)
     return results['nsi']
Esempio n. 26
0
    def analyse(self, tags, wikipediaTag="wikipedia"):
        err=[]
        if wikipediaTag in tags:
            m = self.wiki_regexp.match(tags[wikipediaTag])
            if (tags[wikipediaTag].startswith("http://") or tags[wikipediaTag].startswith("https://")) and not m:
                # tag 'wikipedia' starts with 'http://' but it's not a wikipedia url
                return [{"class": 30310, "subclass": 0}]
            elif m:
                # tag 'wikipedia' seams to be an url
                return [{"class": 30311, "subclass": 1,
                         "text": T_(u"Use wikipedia=%s:*", m.group(2)),
                         "fix": {wikipediaTag: "%s:%s" % (m.group(2), self.human_readable(m.group(3)))} }]

            if not self.lang_regexp.match(tags[wikipediaTag]):
                err.append({"class": 30312, "subclass": 2})
            else:
                prefix = tags[wikipediaTag].split(':', 1)[0]
                tag = wikipediaTag+':'+prefix
                if tag in tags:
                    err.append({"class": 30316, "subclass": 6, "fix": {'-': [tag]}})
            if "%" in tags[wikipediaTag] or "_" in tags[wikipediaTag]:
                err.append({"class": 30313, "subclass": 3, "fix": {wikipediaTag: self.human_readable(tags[wikipediaTag])}} )

        interwiki = False
        missing_primary = []
        for tag in [t for t in tags if t.startswith(wikipediaTag+":")]:
            suffix = tag[len(wikipediaTag)+1:]
            if ":" in suffix:
                suffix = suffix.split(":")[0]

            if wikipediaTag in tags:
                if interwiki == False:
                    try:
                        lang, title = tags[wikipediaTag].split(':')
                        json_str = urlread("http://"+lang+".wikipedia.org/w/api.php?action=query&prop=langlinks&titles="+urllib.quote(title.encode('utf-8'))+"&redirects=&lllimit=500&format=json" , 30)
                        interwiki = json.loads(json_str)
                        interwiki = dict(map(lambda x: [x["lang"], x["*"]], interwiki["query"]["pages"].values()[0]["langlinks"]))
                    except:
                        interwiki = None

                    if interwiki and suffix in interwiki and interwiki[suffix] == self.human_readable(tags[tag]):
                        err.append({"class": 30317, "subclass": 7, "fix": [
                            {'-': [tag]},
                            {'-': [tag], '~': {wikipediaTag: suffix+':'+interwiki[suffix]}}
                        ]})

            if suffix in tags:
                # wikipedia:xxxx only authorized if tag xxxx exist
                err.extend(self.analyse(tags, wikipediaTag+":"+suffix))

            elif self.lang_restriction_regexp.match(suffix):
                if not wikipediaTag in tags:
                    m = self.wiki_regexp.match(tags[tag])
                    if m:
                        value = self.human_readable(m.group(3))
                    elif tags[tag].startswith(suffix+":"):
                        value = tags[tag][len(suffix)+1:]
                    else:
                        value = self.human_readable(tags[tag])
                    missing_primary.append({'-': [tag], '+':{wikipediaTag: "%s:%s" % (suffix, value)}})
            else:
                err.append({"class": 30315, "subclass": 5,
                            "text": T_(u"Invalid wikipedia suffix '%s'", suffix) })

        if missing_primary != []:
          if self.Language:
            missing_primary = sorted(missing_primary, key=lambda x: x['+'][wikipediaTag][0:2] if x['+'][wikipediaTag][0:2] != self.Language else '')
          err.append({"class": 30314, "subclass": 4, "fix": missing_primary})

        return err
Esempio n. 27
0
    def analyse(self, tags, wikipediaTag="wikipedia"):
        err = []
        if wikipediaTag in tags:
            m = self.wiki_regexp.match(tags[wikipediaTag])
            if (tags[wikipediaTag].startswith(u"http://")
                    or tags[wikipediaTag].startswith(u"https://")) and not m:
                # tag 'wikipedia' starts with 'http://' but it's not a wikipedia url
                return [{"class": 30310, "subclass": 0}]
            elif m:
                # tag 'wikipedia' seams to be an url
                return [{
                    "class": 30311,
                    "subclass": 1,
                    "text": T_("Use wikipedia={0}:*", m.group(2)),
                    "fix": {
                        wikipediaTag:
                        "{0}:{1}".format(m.group(2),
                                         self.human_readable(m.group(3)))
                    }
                }]

            if not self.lang_regexp.match(tags[wikipediaTag]):
                err.append({"class": 30312, "subclass": 2})
            else:
                prefix = tags[wikipediaTag].split(':', 1)[0]
                tag = wikipediaTag + ':' + prefix
                if tag in tags:
                    err.append({
                        "class": 30316,
                        "subclass": 6,
                        "fix": {
                            '-': [tag]
                        }
                    })
            if "%" in tags[wikipediaTag] or "_" in tags[wikipediaTag]:
                err.append({
                    "class": 30313,
                    "subclass": 3,
                    "fix": {
                        wikipediaTag: self.human_readable(tags[wikipediaTag])
                    }
                })

        interwiki = False
        missing_primary = []
        for tag in [t for t in tags if t.startswith(wikipediaTag + ":")]:
            suffix = tag[len(wikipediaTag) + 1:]
            if ":" in suffix:
                suffix = suffix.split(":")[0]

            if self.Country and self.Country.startswith(
                    "UA"
            ) and suffix == "ru":  # In Ukraine wikipedia=uk:X + wikipedia:ru=Y are allowed
                continue

            if wikipediaTag in tags:
                if interwiki is False:
                    try:
                        lang, title = tags[wikipediaTag].split(':')
                        json_str = urlread(
                            u"https://" + lang +
                            u".wikipedia.org/w/api.php?action=query&prop=langlinks&titles="
                            + title + u"&redirects=&lllimit=500&format=json",
                            30)
                        interwiki = json.loads(json_str)
                        interwiki = dict(
                            map(
                                lambda x: [x["lang"], x["*"]],
                                list(interwiki["query"]["pages"].values())[0]
                                ["langlinks"]))
                    except:
                        interwiki = None

                if interwiki and suffix in interwiki and interwiki[
                        suffix] == self.human_readable(tags[tag]):
                    err.append({
                        "class":
                        30317,
                        "subclass":
                        stablehash64(tag),
                        "fix": [{
                            '-': [tag]
                        }, {
                            '-': [tag],
                            '~': {
                                wikipediaTag: suffix + ':' + interwiki[suffix]
                            }
                        }]
                    })

            if suffix in tags:
                # wikipedia:xxxx only authorized if tag xxxx exist
                err.extend(self.analyse(tags, wikipediaTag + ":" + suffix))

            elif self.lang_restriction_regexp.match(suffix):
                if not wikipediaTag in tags:
                    m = self.wiki_regexp.match(tags[tag])
                    if m:
                        value = self.human_readable(m.group(3))
                    elif tags[tag].startswith(suffix + ":"):
                        value = tags[tag][len(suffix) + 1:]
                    else:
                        value = self.human_readable(tags[tag])
                    missing_primary.append({
                        '-': [tag],
                        '+': {
                            wikipediaTag: "{0}:{1}".format(suffix, value)
                        }
                    })
            else:
                err.append({
                    "class": 30315,
                    "subclass": stablehash64(tag),
                    "text": T_("Invalid wikipedia suffix '{0}'", suffix)
                })

        if missing_primary != []:
            if self.Language:
                missing_primary = sorted(
                    missing_primary,
                    key=lambda x: x['+'][wikipediaTag][0:2]
                    if x['+'][wikipediaTag][0:2] != self.Language.split('_')[
                        0] else '')
            err.append({"class": 30314, "subclass": 4, "fix": missing_primary})

        return err