예제 #1
0
def patch_wikip_country():
    print('[+] patch Wikipedia list of country prefixes: WIKIP_COUNTRY')
    #
    isonameset = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    #
    for country, preflist in sorted(WIKIP_COUNTRY.items()):
        assert (all([pref.isdigit() for pref in preflist]))
        #
        found = False
        #
        if country in COUNTRY_RENAME:
            newname = COUNTRY_RENAME[country]
            WIKIP_COUNTRY[newname] = preflist
            del WIKIP_COUNTRY[country]
            print('> country name changed from %s to %s' % (country, newname))
            country = newname
            found = True
        #
        elif country in isonameset:
            found = True
        #
        elif country in SUBTERR_TO_COUNTRY:
            #print('> country %s, prefix %s, sub-territory of %s'\
            #      % (country, ', '.join(['+%s' % pref for pref in preflist]), SUBTERR_TO_COUNTRY[country]))
            found = True
        #
        else:
            for r in sorted(WIKIP_ISO3166.values(),
                            key=lambda r: r['country_name']):
                if country_present(country, r['nameset']):
                    newname = r['country_name']
                    WIKIP_COUNTRY[newname] = preflist
                    del WIKIP_COUNTRY[country]
                    print('> country name changed from %s to %s' %
                          (country, newname))
                    country = newname
                    found = True
                    break
        #
        if not found:
            print('> country name %s, prefix %s, not found in WIKIP_ISO3166'\
                  % (country, ', '.join(['+%s' % pref for pref in preflist])))
        #
        for pref in preflist:
            if pref not in WIKIP_MSISDN:
                found = True
                for i in range(len(pref) - 1, 0, -1):
                    if pref[:i] in WIKIP_MSISDN:
                        if i == 1:
                            print('> country %s, prefix +%s not in WIKIP_MSISDN, corresponds to +%s'\
                                  % (country, pref, pref[:i]))
                        else:
                            print('> country %s, prefix +%s not in WIKIP_MSISDN, but +%s corresponds to %s'\
                                  % (country, pref, pref[:i],
                                     ', '.join(['%s (%s)' % (r[1], r[0]) for r in WIKIP_MSISDN[pref[:i]]])))
                        found = True
                if not found:
                    print('> country %s, prefix +%s not in WIKIP_MSISDN' %
                          (country, pref))
예제 #2
0
def patch_txtn_mnc():
    print('[+] patch txtNation list of MCC-MNC: CSV_TXTN_MCCMNC')
    #
    isonameset = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    #
    for mccmnc, inf in sorted(CSV_TXTN_MCCMNC.items()):
        if not mccmnc.isdigit():
            print('> deleting entry for %s' % mccmnc)
            del CSV_TXTN_MCCMNC[mccmnc]
            continue
        #
        if isinstance(inf, list):
            infs = inf
            for inf in infs:
                if inf[0] not in isonameset:
                    newname = _patch_country_name(inf[0])
                    if newname:
                        i = infs.index(inf)
                        del infs[i]
                        infs.insert(i, (newname, inf[1]))
        else:
            if inf[0] not in isonameset:
                newname = _patch_country_name(inf[0])
                if newname:
                    CSV_TXTN_MCCMNC[mccmnc] = (newname, inf[1])
예제 #3
0
def patch_egal_min_dist():
    print('[+] patch Egallic country distance dataset: CSV_EGAL_MIN_DIST')
    #
    isonameset    = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    #
    # 1) rename some countries
    for src, dst_dist in sorted(CSV_EGAL_MIN_DIST.items()):
        for dst in sorted(dst_dist):
            if dst in COUNTRY_RENAME:
                CSV_EGAL_MIN_DIST[src][COUNTRY_RENAME[dst]] = CSV_EGAL_MIN_DIST[src][dst]
                del CSV_EGAL_MIN_DIST[src][dst]
    #
    for src, dst_dist in sorted(CSV_EGAL_MIN_DIST.items()):
        if src in COUNTRY_RENAME:
            CSV_EGAL_MIN_DIST[COUNTRY_RENAME[src]] = CSV_EGAL_MIN_DIST[src]
            del CSV_EGAL_MIN_DIST[src]
            src = COUNTRY_RENAME[src]
    #
    # 2) do some verifications
    for src, dst_dist in sorted(CSV_EGAL_MIN_DIST.items()):
        for dst in dst_dist:
            if dst not in CSV_EGAL_MIN_DIST:
                print('>>> dst country %s in %s, not in src' % (dst, src))
        if not src in isonameset:
            if not src in SUBTERR_TO_COUNTRY:
                print('>>> country %s, not matching any territory name' % src)
            else:
                print('> country %s, matching only a sub-territory name' % src)
예제 #4
0
def patch_wfb():
    print('[+] patch the World Factbook dataset: WORLD_FB')
    #
    isonameset = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    #
    for name, infos in sorted(WORLD_FB.items()):
        for k in ('cc2', 'cc3', 'ccn', 'gec'):
            if infos[k] == '-':
                infos[k] = ''
        if name in WFB_COUNTRY_DEL:
            del WORLD_FB[name]
            print('> country %s deleted' % name)
    #
    for name, infos in sorted(WORLD_FB.items()):
        if name in COUNTRY_RENAME:
            newname = COUNTRY_RENAME[name]
            assert (newname not in WORLD_FB)
            WORLD_FB[newname] = infos
            del WORLD_FB[name]
            print('> country name changed from %s to %s' % (name, newname))
        # patch borders' names too
        try:
            bord = infos['infos']['boundaries']['bord']
        except Exception:
            pass
        else:
            for b, d in list(bord.items()):
                if b in COUNTRY_RENAME:
                    bord[COUNTRY_RENAME[b]] = d
                    del bord[b]
    #
    for name, infos in sorted(WORLD_FB.items()):
        if name not in isonameset:
            if name in SUBTERR_TO_COUNTRY:
                country = SUBTERR_TO_COUNTRY[name]
                if infos['cc2'] and infos['cc2'] != COUNTRY_SPEC[country][
                        'cc2']:
                    print('> country %s, exists as sub-territory, CC2 mismatch %s / %s'\
                          % (name, infos['cc2'], COUNTRY_SPEC[country]['cc2']))
                else:
                    pass
                    #print('> country %s, exists as sub-territory of %s' % (name, country))
            else:
                # CC2 lookup
                if infos['cc2']:
                    if infos['cc2'] not in WIKIP_ISO3166:
                        print('> country %s, CC2 %s, not in WIKIP_ISO3166' %
                              (name, infos['cc2']))
                    else:
                        newname = WIKIP_ISO3166[infos['cc2']]['country_name']
                        assert (newname not in WORLD_FB)
                        WORLD_FB[newname] = infos
                        del WORLD_FB[name]
                        print('> country name changed from %s to %s' %
                              (name, newname))
                elif name not in WFB_UNINHABITED:
                    print('> country %s, no CC2, not referenced, unknown' %
                          name)
예제 #5
0
def patch_itut_sanc(sanclist):
    print('[+] patch ITU-T list of SANC: %r' % id(sanclist))
    #
    isonameset  = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    for sanc, cntr in list(sanclist.items()):
        if cntr not in isonameset:
            newname = _patch_country_name(cntr)
            if newname:
                sanclist[sanc] = newname
예제 #6
0
def patch_itut_spc(spclist):
    print('[+] patch ITU-T list of SPC: %r' % id(spclist))
    #
    isonameset  = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    for cntr, spcs in list(spclist.items()):
        if cntr not in isonameset:
            newname = _patch_country_name(cntr)
            if newname:
                del spclist[cntr]
                spclist[newname] = spcs
예제 #7
0
def patch_wikip_mnc():
    print('[+] patch Wikipedia list of MCC-MNC: WIKIP_MNC')
    #
    nameset = set()
    [
        nameset.update(country_name_canon(r['country_name']))
        for r in WIKIP_ISO3166.values()
    ]
    #
    # patch country names and ensure country name exists in iso3166 db (or align them)
    # ensure CC2 is upper case and in WIKIP_ISO3166
    # ensure CC2 is available when network is not intl
    logs = set()
    for mcc0 in sorted(WIKIP_MNC):
        for r in WIKIP_MNC[mcc0]:
            for oldname, newname in sorted(COUNTRY_RENAME.items()):
                if country_match(r['country_name'], oldname):
                    r['country_name'] = newname
                    print('> MCC %s MNC %s, country name changed from %s to %s'\
                          % (r['mcc'], r['mnc'], oldname, newname))
            #
            r['codes_alpha_2'] = list(
                sorted(map(str.upper, r['codes_alpha_2'])))
            #
            if r['codes_alpha_2']:
                if len(r['codes_alpha_2']) == 1 and not country_present(
                        r['country_name'], nameset):
                    newname = WIKIP_ISO3166[r['codes_alpha_2']
                                            [0]]['country_name']
                    logs.add('> MCC %s, all MNC, country name updated from %s to %s, CC2 %s'\
                          % (r['mcc'], r['country_name'], newname, r['codes_alpha_2'][0]))
                    r['country_name'] = newname
                for cc2 in r['codes_alpha_2']:
                    if cc2 not in WIKIP_ISO3166:
                        print('> MCC %s MNC %s, CC2 %s unknown' %
                              (r['mcc'], r['mnc'], cc2))
            elif r['mcc'] not in MCC_INTL:
                print('> MCC %s MNC %s, no CC2 but not intl network' %
                      (r['mcc'], r['mnc']))
    #
    aliases = []
    for mcc0 in sorted(WIKIP_MNC):
        for r in WIKIP_MNC[mcc0]:
            if (r['mcc'], r['mnc']) in MNC_ALIAS:
                # add an alias
                alias = dict(r)
                alias['mcc'], alias['mnc'] = MNC_ALIAS[(r['mcc'], r['mnc'])]
                aliases.append(alias)
                print('> added MNC alias %s.%s -> %s.%s'\
                      % (r['mcc'], r['mnc'], alias['mcc'], alias['mnc']))
    for alias in aliases:
        WIKIP_MNC[alias['mcc'][0:1]].append(alias)
    #
    for log in sorted(logs):
        print(log)
예제 #8
0
def patch_itut_mnc(mncs):
    print('[+] patch ITU-T list of MCC-MNC: %r' % id(mncs))
    #
    isonameset  = set([r['country_name'] for r in WIKIP_ISO3166.values()])
    mncset      = set()
    for mnos in WIKIP_MNC.values():
        for mno in mnos:
            mncset.add(mno['mcc'] + mno['mnc'])
    #
    for cntr, mnos in list(mncs.items()):
        if cntr not in isonameset:
            newname = _patch_country_name(cntr)
            if newname:
                del mncs[cntr]
                mncs[newname] = mnos
예제 #9
0
def _patch_country_name(name):
    #
    if name in COUNTRY_RENAME:
        newname = COUNTRY_RENAME[name]
        print('> country name changed from %s to %s' % (name, newname))
        return newname
    #
    nameset = country_name_canon(name)
    for cinf in WIKIP_ISO3166.values():
        for namesub in nameset:
            if country_match_set(namesub, cinf['nameset']):
                newname = cinf['country_name']
                print('> country name changed from %s to %s' % (name, newname))
                return newname
    #
    print('>>> country name %s not found' % name)
    return ''
예제 #10
0
def patch_wikip_borders():
    print('[+] patch Wikipedia territory borders list: WIKIP_BORDERS')
    #
    # 1) patch names
    for r in WIKIP_BORDERS:
        for oldname, newname in sorted(COUNTRY_RENAME.items()):
            if country_match(r['country_name'], oldname): 
                r['country_name'] = newname
                if newname in COUNTRY_SPEC and 'url' in COUNTRY_SPEC[newname]:
                    r['country_url'] = COUNTRY_SPEC[newname]['url']
                print('> country name changed from %s to %s' % (oldname, newname))
        for n in r['neigh'][:]:
            for oldname, newname in sorted(COUNTRY_RENAME.items()):
                if country_match(n, oldname):
                    r['neigh'].remove(n)
                    r['neigh'].append(newname)
                    r['neigh'].sort()
                    print('> border changed from %s to %s, country %s'\
                          % (oldname, newname, r['country_name']))
        for s in r['country_sub'][:]:
            if country_match(s[0], r['country_name']):
                # simply remove territory
                r['country_sub'].remove(s)
            for oldname, newname in sorted(COUNTRY_RENAME.items()):
                if country_match(s[0], oldname):
                    if newname in COUNTRY_SPEC and 'url' in COUNTRY_SPEC[newname]:
                        new_s = (newname, COUNTRY_SPEC[newname])
                    else:
                        new_s = (newname, s[1])
                    r['country_sub'].remove(s)
                    r['country_sub'].append( new_s )
                    r['country_sub'].sort(key=lambda t: t[0])
                    print('> sub changed from %s to %s, country %s'\
                          % (oldname, newname, r['country_name']))
    #
    # 2) delete entries
    for r in WIKIP_BORDERS[:]:
        if r['country_name'] in BORD_COUNTRY_DEL:
            WIKIP_BORDERS.remove(r)
            print('> country %s deleted' % r['country_name'])
        elif r['country_name'] in BORD_DUP_DEL and r['country_sub']:
            WIKIP_BORDERS.remove(r)
            print('> duplicated country %s deleted' % r['country_name'])
            # ensure all sub-territories are referenced within COUNTRY_SPEC
            for name, url in r['country_sub']:
                if not country_present(name, COUNTRY_SPEC[r['country_name']]['sub']):
                    print('>>> country %s, sub-territory %s not present in COUNTRY_SPEC'\
                          % (r['country_name'], name))
        else:
            for n in r['neigh'][:]:
                if country_present(n[0], BORD_COUNTRY_DEL):
                    r['neigh'].remove(n)
                    print('> country %s, border %s deleted' % (r['country_name'], n[0]))
    #
    # 4) remove borders to FR, NL, UK when actually against an oversea territory
    # to enable conversion of WIKIP_BORDERS into a dict
    BD = {r['country_name']: r for r in WIKIP_BORDERS}
    assert( len(BD) == len(WIKIP_BORDERS) )
    for name in BORD_DUP_DEL:
        for r in WIKIP_BORDERS:
            for r_neigh in r['neigh'][:]:
                if country_match(r_neigh, name) \
                and not country_present(r['country_name'], BD[name]['neigh']):
                    # delete entry
                    r['neigh'].remove(r_neigh)
                    print('> country %s, neighbour %s deleted' % (r['country_name'], r_neigh))
    #
    # 5) ensure all country / borders are in the ISO3166 dict
    isonames = set()
    [isonames.update(country_name_canon(r['country_name'])) for r in WIKIP_ISO3166.values()]
    for r in WIKIP_BORDERS:
        #
        if not any([name in isonames for name in country_name_canon(r['country_name'])]):
            if country_present(r['country_name'], SUBTERR_TO_COUNTRY):
                # warning: SUBTERR_TO_COUNTRY lookup could fail here
                print('> not present in ISO3166 dict but referenced as sub of %s, country %s'\
                      % (SUBTERR_TO_COUNTRY[r['country_name']], r['country_name']))
            else:
                print('>>> not present in ISO3166 dict, country %s' % r['country_name'])
        #
        if r['country_name'] in COUNTRY_SPEC and 'bord' in COUNTRY_SPEC[r['country_name']]:
            # ensure those specific borders are correctly referenced
            for b in COUNTRY_SPEC[r['country_name']]['bord']:
                if b not in [n[0] for n in r['neigh']]:
                    print('>>> missing border %s, country %s' % (b, r['country_name']))
        #
        for n in r['neigh']:
            if not any([name in isonames for name in country_name_canon(n)]):
                if country_present(n, SUBTERR_TO_COUNTRY):
                    # warning: SUBTERR_TO_COUNTRY lookup could fail here
                    print('> border %s not present in ISO3166 dict but referenced as sub of %s, country %s'\
                          % (n, SUBTERR_TO_COUNTRY[n], r['country_name']))
                else:
                    print('>>> border %s not present in ISO3166 dict, country %s'\
                          % (n, r['country_name']))
예제 #11
0
def patch_wikip_iso3166():
    print('[+] patch Wikipedia ISO3166 dict: WIKIP_ISO3166')
    #
    # 1) add entries
    for country, infos in sorted(COUNTRY_SPEC.items()):
        if 'cc2' in infos and infos['cc2'] not in WIKIP_ISO3166:
            r = dict(REC_ISO3166)
            for k in r:
                if k in infos:
                    r[k] = infos[k]
            r['country_name'] = country
            r['code_alpha_2'] = infos['cc2']
            if 'url' in infos:
                r['country_url'] = infos['url']
            WIKIP_ISO3166[r['code_alpha_2']] = r
            print('> CC2 %s, %s added' % (r['code_alpha_2'], r['country_name']))
    #
    for new, old in sorted(CC2_ALIAS.items()):
        if new not in WIKIP_ISO3166:
            WIKIP_ISO3166[new] = WIKIP_ISO3166[old]
            print('> CC2 %s, alias to %s' % (new, old))
    #
    # 1bis) add more entries, extracted from the international telephone numbering listing
    for pref, infos in sorted(WIKIP_MSISDN.items()):
        for cc2, name, url in sorted(infos):
            if cc2 not in WIKIP_ISO3166:
                r = dict(REC_ISO3166)
                r['code_alpha_2']  = cc2
                r['country_name']  = name
                r['country_url']   = url
                WIKIP_ISO3166[cc2] = r
                print('> CC2 %s, %s added from WIKIP_MSISDN' % (cc2, name))
    #
    # 2) patch country names
    for cc2, infos in sorted(WIKIP_ISO3166.items()):
        for oldname, newname in sorted(COUNTRY_RENAME.items()):
            if country_match(infos['country_name'], oldname): 
                infos['country_name'] = newname
                if newname in COUNTRY_SPEC_CC2:
                    assert( COUNTRY_SPEC[newname]['cc2'] == cc2 )
                    if 'url' in COUNTRY_SPEC[newname]:
                        infos['country_url'] = COUNTRY_SPEC[newname]['url']
                print('> country name changed from %s to %s, CC2 %s' % (oldname, newname, cc2))
    #
    # 3) ensure all tld are lower case, and CC codes are upper case
    for infos in WIKIP_ISO3166.values():
        infos['cc_tld']       = infos['cc_tld'].lower()
        infos['cc_tld_url']   = infos['cc_tld_url'].lower()
        infos['code_alpha_2'] = infos['code_alpha_2'].upper()
        infos['code_alpha_3'] = infos['code_alpha_3'].upper()
    #
    # 4) ensure all canon names do not collide
    names = [country_name_canon(r['country_name']) for r in \
             [WIKIP_ISO3166[cc2] for cc2 in sorted(WIKIP_ISO3166)]]
    for i, nameset in enumerate(names[:-1]):
        for name in nameset:
            for j, nameset_totest in enumerate(names[1+i:]):
                if name in nameset_totest:
                    print('>>> country name collision %s / %s'\
                          % (sorted(WIKIP_ISO3166)[i], sorted(WIKIP_ISO3166)[i+1+j]))
    #
    # 5) ensure all overseas, sub-territories and other geographic specificities
    # are referenced correctly, and verify sovereignity
    for country in COUNTRY_SPEC_CC2:
        cc2 = COUNTRY_SPEC[country]['cc2']
        wc  = WIKIP_ISO3166[cc2]
        if country != wc['country_name']:
            print('> country name changed from %s to %s, CC2 %s' % (wc['country_name'], country, cc2))
            wc['country_name'] = country
            if 'url' in COUNTRY_SPEC[country]:
                wc['country_url'] = COUNTRY_SPEC[country]['url']
        if 'sub_cc2' in COUNTRY_SPEC[country]:
            for cc2_s in COUNTRY_SPEC[country]['sub_cc2']:
                if cc2_s not in WIKIP_ISO3166:
                    print('>>> missing CC2 %s, part of %s' % (cc2_s, country))
                else:
                    wc_s = WIKIP_ISO3166[cc2_s]
                    
                    
                    if wc_s['sovereignity'] == '':
                        wc_s['sovereignity'] = cc2
                    elif wc_s['sovereignity'] != cc2:
                        print('>>> CC2 %s, %s, sovereignity mismatch %s / %s'\
                              % (cc2_s, wc_s['country_name'], wc_s['sovereignity'], cc2))
    #
    # 6) keep track of country name variants
    for cc2, infos in sorted(WIKIP_ISO3166.items()):
        infos['nameset'] = country_name_canon(infos['country_name'])
        if infos['state_name']:
            infos['nameset'].update( country_name_canon(infos['state_name']) )