def capitales_etudiee_oecd(): ''' Enable to retrieve OECD countries capitals through CountryInfo() method. This is useful for OpenWeatherMap informations extracting. ==> Countries capitals are return in a dictionnary as follows: { ... 'SRI LANKA': 'Colombo', 'SUDAN': 'Khartoum', etc ... } ''' with open('app/static/json/city.list.json') as f: data = json.load(f) capitales_oecd = {} nom_pays_ocde_majuscules = [] nom_pays_open_weather_majuscules = [] pays_communs_pour_projet = [] for pays in nom_des_pays: nom_pays_ocde_majuscules.append(pays.upper()) infos_pays = CountryInfo() infos_pays = infos_pays.all() infos_pays = dict(infos_pays) for pays in infos_pays.keys(): nom_pays_open_weather_majuscules.append(pays.upper()) ### Creation of a set in order to find similar cities within lists pays_ocde = set(nom_pays_ocde_majuscules) pays_open_weather = set(nom_pays_open_weather_majuscules) pays_communs_pour_projet = pays_ocde.intersection(pays_open_weather) ### Converting set in list pays_communs_pour_projet = list(pays_communs_pour_projet) pays_communs_pour_projet.sort(reverse=False) for pays in pays_communs_pour_projet: country = CountryInfo(pays) # capitale = country.capital().upper() capitale = country.capital() capitales_oecd[pays] = capitale return capitales_oecd
def geo_lng(dataloader, args): mappings = pickle.load(open('util_files/country_lang_mappings.pkl', 'rb')) iso3_to_lang = mappings['iso3_to_lang'] # Country to iso3 mappings that are missing missing = {'South+Korea': 'KOR', 'North+Korea': 'PRK', 'Laos': 'LAO', 'Caribbean+Netherlands': 'BES', 'St.+Lucia': 'LCA', 'East+Timor': 'TLS', 'Democratic+Republic+of+Congo': 'COD', 'Swaziland': 'SWZ', 'Cape+Verde': 'CPV', 'C%C3%B4te+d%C2%B4Ivoire': 'CIV', 'Ivory+Coast': 'CIV', 'Channel+Islands': 'GBR' } use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = models.alexnet(pretrained=True).to(device) new_classifier = nn.Sequential(*list(model.classifier.children())[:-1]) model.classifier = new_classifier with_country = dataloader.dataset.with_country country_with_langs = {} country_with_imgs = {} # for each country, first list is tourist second is local lang_counts = {} detecter = fasttext.load_model('util_files/lid.176.bin') lang_dict = {} normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) for i, (data, target) in enumerate(tqdm(dataloader)): if data is None: continue this_tags = [tag['label'] for tag in target[0] if len(tag['label']) >= 3] if len(this_tags) > 0: srcz = [] conf = [] for tag in this_tags: classify = detecter.predict(tag) srcz.append(classify[0][0][9:]) conf.append(classify[1][0]) # Pick out the most common language commons = Counter(srcz).most_common() the_src = commons[0][0] # If the most common language is English, look at the second most common language # since people oftentimes use English even when it's not their native language if the_src == 'en' and len(commons) > 1: the_src_maybe = commons[1][0] words = [i for i in range(len(srcz)) if srcz[i] == the_src_maybe] # If this second most common language has been classified with more than .5 # probability, then choose this language for the image for word in words: if conf[word] > .5: the_src = the_src_maybe if the_src in lang_counts.keys(): lang_counts[the_src] += 1 else: lang_counts[the_src] = 1 country = target[2][0] iso3 = None local = None try: iso3 = pycountry.countries.search_fuzzy(country.replace('+', ' '))[0].alpha_3 except LookupError: iso3 = missing[country] try: country_info = CountryInfo(country.replace('+', ' ')).info() except KeyError: country_info = {} country_name = country.split('+') if 'name' in country_info.keys(): country_name += country_info['name'] if 'nativeName' in country_info.keys(): country_name += country_info['nativeName'] # When comparing images to distinguish between tourist and local, we further look into the content of the tags, # allowing some images to be categorized as 'unknown' if we are not that sure if it's tourist or local # Local: in a local language, country's name isn't a tag, and 'travel' isn't a tag # Tourist: in a non-local language, or 'travel' is a tag try: if the_src in iso3_to_lang[iso3] and len(set(country_name)&set(this_tags)) == 0 and 'travel' not in this_tags: local = 1 elif the_src not in iso3_to_lang[iso3] or 'travel' in this_tags: local = 0 except KeyError: print("This iso3 can't be found in iso3_to_lang: {}".format(iso3)) if country not in country_with_langs.keys(): country_with_langs[country] = [] country_with_imgs[country] = [[], []] country_with_langs[country].append(the_src) if local is not None: if len(country_with_imgs[country][local]) < 500: data = normalize(data).to(device) big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device) this_features = model.forward(big_data) country_with_imgs[country][local].append((this_features.data.cpu().numpy(), target[3])) info = {} info['lang_counts'] = lang_counts info['country_with_langs'] = country_with_langs info['country_with_imgs'] = country_with_imgs pickle.dump(info, open("results/{}/geo_lng.pkl".format(args.folder), "wb"))