def spreading(self): #exemple while self.run: if random.randint(1, 30) == 1: #global if len(self.healthy) != 0: newInfected = random.choice(self.healthy) self.infect.plotAvion(newInfected) if newInfected.name not in self.succeptible: self.succeptible.append(newInfected.name) plt.title( coco.convert(names=newInfected.name, to="short_name") + " IS NOW INFECTED", fontsize=50) self.update(newInfected) else: #local if len(self.succeptible) != 0: newInfected = random.choice(self.succeptible) plt.title( coco.convert(names=newInfected, to="short_name") + " IS NOW INFECTED", fontsize=50) self.update(self.finde(newInfected)) if len(self.succeptible) == 0 and len(self.healthy) == 0: self.infect.end() print("Every contry are infected") break
def get_sentiment_per_country_per_day(self): select_sql = "SELECT u.country_code, avg(t.sentiment_pol) AS sentiment, AVG(t.sentiment_pol*t.sentiment_pol) - AVG(t.sentiment_pol)*AVG(t.sentiment_pol) AS variance, DATE(t.created_at) AS created_at from tweet t " \ "JOIN user u on u.id = t.user_id " \ "WHERE u.country_code IN ('us','pl', 'gb', 'it') AND t.created_at > '2020-03-07' AND t.created_at <> '2020-03-17' AND t.created_at <> '2020-04-29' " \ "GROUP BY u.country_code, DATE(t.created_at);" self.cur.execute(select_sql) data = self.cur.fetchall() result = dict() for row in data: cc = coco.convert(names=row['country_code'], to='ISO3') if cc not in result.keys(): cn = coco.convert(cc, to='name_short') result[cc] = dict() result[cc]['dates'] = [] result[cc]['sentiment'] = [] result[cc]['stdev'] = [] result[cc]['name'] = cn result[cc]['dates'].append(row['created_at']) sentiment = row['sentiment'] if row['sentiment'] is not None else 0 stdev = sqrt(row['variance']) if row['variance'] is not None else 0 result[cc]['sentiment'].append(sentiment) result[cc]['stdev'].append(stdev) return result
def get_tweets_per_country(self, from_date=None, to_date=None, divided_by_population=False): from_date, to_date = self.parse_from_to_date(from_date, to_date) select_sql = "SELECT u.country_code, count(*) AS number FROM tweet t " \ "JOIN user u on u.id = t.user_id " \ "WHERE u.country_code IS NOT NULL AND u.country_code <> 'und' AND t.created_at >= '" + from_date + "' AND t.created_at <= '" + to_date + "' " \ "GROUP BY u.country_code" self.cur.execute(select_sql) data = self.cur.fetchall() result = dict() result['country_code'] = [] result['country_name'] = [] result['number'] = [] for row in data: cc = coco.convert(names=row['country_code'], to='ISO3') cn = coco.convert(cc, to='name_short') result['country_code'].append(cc) result['country_name'].append(cn) result['number'].append(row['number']) if divided_by_population: population = self.get_countries_populations(result['country_name'], all=True) for i in range(len(result['country_name'])): name = result['country_name'][i] if name in population.keys(): if population[name] is not None: result['number'][i] /= population[name] else: result['number'][i] = 0 else: result['number'][i] = 0 return result
def ratio(df): df_ratio = df.reset_index().groupby('country', as_index=False).sum( ) #group the data by country (date is not considered) pop = pd.read_excel('data/population.xlsx') #data for country's population pop.Country = coco.convert(names=pop.Country.to_list(), to='short_name') #convert country names pop.columns = ['country', 'population'] #rename columns country_list = coco.convert(names=df_ratio.country.to_list(), to='short_name', not_found=None) #convert country names df_ratio['country'] = country_list df_ratio = pd.merge(df_ratio, pop, how='left', on='country') #merge with main df df_ratio[ 'case_ratio'] = df_ratio.new_case / df_ratio.population #calculate the case per capita ratio df_ratio[ 'death_ratio'] = df_ratio.new_death / df_ratio.population # calculate the death per capita ratio try: dest = 'flags_ratio' copy_flag(dest, country_list, ship=False) except: print("Copying flags to Tableau folder unsuccessful") return df_ratio
def copy_flag(dest, country_list, ship=True): import os, shutil import config path = 'tableau/flags' dest = config.dir + dest files = os.listdir(path) shutil.rmtree(dest, ignore_errors=True) #remove existing flag folder os.mkdir(dest) #remake a new folder for file in files: if ship: country_name = coco.convert( names=file.split('.')[0], to='short_name', not_found=None) #convert country names to standard name else: country_name = coco.convert(names=file.split('.')[0], to='short_name') if country_name in country_list: #Copy the country flags if they are in the dataset src = os.path.join(path, file) shutil.copy(src, dest) if country_name == 'DR Congo': #the capitalized R in DR is problematic for Tableau os.rename(os.path.join(dest, file), os.path.join(dest, 'Dr Congo.png')) elif country_name == 'MS Zaandam': #the capitalized S in MS is problematic for Tableau os.rename(os.path.join(dest, file), os.path.join(dest, 'Ms Zaandam.png')) else: os.rename(os.path.join(dest, file), os.path.join(dest, country_name + '.png')) return
def country(df): df_country = df.reset_index().groupby( ['date', 'country'], as_index=False).sum() #group the data by date (daily) and then country country_list = coco.convert( names=df_country.country.to_list(), to='short_name', not_found=None) #convert country names to standard df_country['country'] = country_list df_country['continent'] = coco.convert(names=df_country.country.to_list(), to='continent') #this dataframe is for creating a ranking column based on total confirmed cases, which is helpful in Tableau df_country_sort = df_country.groupby( 'country', as_index=False).sum().sort_values('new_case', ascending=False) df_country_sort.index = np.arange(1, len(df_country_sort) + 1) df_country_sort = df_country_sort.reset_index().loc[:, 'index':'country'] df_country = pd.merge(df_country, df_country_sort, how='left', on='country') #merge with main dataframe try: dest = 'flags_country' copy_flag(dest, country_list) except: print("Copying flags to Tableau folder unsuccessful") return df_country
async def flag(self, ctx, *country: str): if len(country) == 0: country = random.choice(quiz_country_list) result4 = coco.convert(names=country, to="ISO2") url = f"https://flagcdn.com/w320/{result4.lower()}.jpg" e = discord.Embed(title=f"Flag of {country.title()}") e.set_image(url=url) await ctx.send(embed=e) else: country = " ".join(country) country = await country_filter(country, ctx) if country is None: return country = country["name"] result4 = coco.convert(names=country, to="ISO2") url = f"https://flagcdn.com/w320/{result4.lower()}.jpg" e = discord.Embed(title=f"Flag of {country.title()}") e.set_image(url=url) try: await ctx.send(embed=e) except: embed = discord.Embed(title="Error", description=":x: Country not found") await ctx.send(embed=embed)
def add_country_code(data, ctry_col): """ Append two new columns to the data containing each entry's country's country codes. :param data: filepath (.csv or .xlsx extension) or dataframe. :type data: str, DataFrame, geopandas.GeoDataFrame. :param ctry_col: name of the country column. :type ctry_col: str. :return: the modified dataframe with the new columns 'ISO2' and 'ISO3' for two-letter and three-letter country codes respectively. :rtype: DataFrame if the type of `data` is DataFrame or str, or geopandas.GeoDataFrame if it is geopandas.GeoDataFrame. >>> import pandas as pd >>> df = pd.DataFrame({'City': ['Rabat', 'Lyon', 'Cleveland'], ... 'Country': ['Morocco', 'France', 'United States of America']}) >>> add_country_code(df=data, ctry_col='Country') City Country ISO2 ISO3 0 Rabat Morocco MA MAR 1 Lyon France FR FRA 2 Cleveland United States of America US USA """ df = read_data(data, {ctry_col}) df['ISO2'] = None df['ISO3'] = None df['ISO2'] = coco.convert(names=list(df[ctry_col]), to='ISO2') df['ISO3'] = coco.convert(names=list(df[ctry_col]), to='ISO3') return df
async def gni_percap(self, ctx, country: str, year: int): await ctx.defer(hidden=True) arg = country arg2 = str(year) try: country1 = coco.convert(names=arg, to="iso2") country2 = [] country2.append(country1) # set up the indicator I want (just build up the dict if you want more than one) indicators = {"NY.GNP.PCAP.CD": "GNI per Capita"} # grab indicators above for countires above and load into data frame df = wbdata.get_dataframe( indicators, country=country2, convert_date=False ).to_dict()["GNI per Capita"][arg2] if str(df) == "nan": embed = discord.Embed( title="Sorry", description="**We couldn't find data for that year**", color=0xFF5733, ) embed.set_thumbnail(url=url) await ctx.send(embed=embed) else: embed = discord.Embed( title="GNI per capita of {}".format(arg), description=f"The gni per capita of {arg} in {arg2} was/is $`{str(df)}`", color=0xFF5733, ) result3 = coco.convert(names=arg, to="ISO2") embed.set_thumbnail( url=f"https://flagcdn.com/w80/{result3.lower()}.jpg" ) embed.set_footer(text="Information requested by: {}".format(ctx.author)) await ctx.send(embed=embed) except: embed = discord.Embed( title="Sorry", description="** We could not find data for that year**", color=0xFF5733, ) embed.set_thumbnail(url=url) await ctx.send(embed=embed)
def prepare_daily_report(mes,dia): current_date = (datetime.today() - timedelta(days=1)).strftime('%m-%d-%Y') #df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/' + current_date + '.csv') df_bar = pd.read_csv('data_final.csv') #df_country = df.groupby(['Country_Region']).sum().reset_index() #df_country.replace('US', 'United States', inplace=True) df_dia = df_bar[df_bar['dia_nomi']== dia] df_mes = df_dia[df_dia['mes']== mes] #atraso+puntual df_mes['atraso_total'] = df_mes['atraso'].replace([0], [1]) df_country = df_mes.groupby(['dest_pais']).sum().reset_index() df_country = df_country[df_country['dest_pais']!= 'CL'] #code_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv') code_df = pd.read_csv('code.csv') df_country['pais']= coco.convert(names=df_country.dest_pais.tolist(), to='name_short', not_found=None) df_country_code = df_country.merge(code_df, left_on='pais', right_on='COUNTRY', how='left') return(df_country_code)
async def states(self, ctx, *, country): data = await country_filter(country, ctx) if data is None: return country = data["name"] country1 = CountryInfo(country) result = country1.provinces() for i, x in enumerate(result): result[i] = "`" + x + "`" result1 = " |".join(result) result2 = re.sub(r"(?<=[|])(?=[^\s])", r" ", result1) embed = discord.Embed( title="States of " + country, description="**{result2}**".format(result2=result2), color=0xFF5733, ) result4 = coco.convert(names=country, to="ISO2") embed.set_thumbnail( url=f"https://flagcdn.com/w80/{result4.lower()}.jpg") embed.set_footer( text="Requested by: {name}".format(name=ctx.author), icon_url=ctx.author.avatar_url, ) await ctx.send(embed=embed)
def iso3_to_country(iso3): """ Take user input and convert it to the short version of the country name """ if iso3 == 'Global': return 'Global' country = coco.convert(names=iso3, to='name_short') return country
def get_data(cur, conn, start_date, end_date): countries = [] cur.execute(f'SELECT country FROM GDP ORDER BY GDP DESC') country_names = cur.fetchall() for row in range(0, 99, 10): countries.append(country_names[row][0]) codes = coco.convert(names=countries, to='ISO3') request_url = f'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/{start_date}/{end_date}' request = requests.get(request_url) jsons = json.loads(request.text) data = jsons.get('data') data_list = [] i = 0 for name in codes: dateCases = [] for date in data: all_cases = data[date].get(codes[i]) if all_cases is None: continue else: cases = all_cases.get('confirmed') dateCases.append((date, countries[i], cases)) data_list.append(dateCases) i += 1 return data_list
def __init__(self, db, predict_range, country_name, state_name=None): pandemic_data = db.get_epidemic_data_in( [country_name], ['deaths', 'confirmed', 'recovered'], "COVID19", since_epidemy_start=False, state_name=state_name, from_date='2020-03-07') country_code = coco.convert(country_name, to='ISO2').lower() self.timeline = pandemic_data['dates'] self.extended_timeline = self.timeline self.predict_range = predict_range self.tweets, self.positive_tweets, self.negative_tweets = db.get_tweets_per_day_in( country_code, state_name, self.timeline[0], self.timeline[-1]) self.users_in_country = db.get_number_of_users_in( country_code, state_name) self.days = len(self.tweets) self.confirmed = pandemic_data[country_name]['confirmed'] self.recovered = pandemic_data[country_name]['recovered'] self.deaths = pandemic_data[country_name]['deaths'] self.active = [ pandemic_data[country_name]['confirmed'][i] - self.recovered[i] - self.deaths[i] for i in range(len(self.deaths)) ] self.N = pandemic_data[country_name]['population'] self.beta = 1 self.gamma = 1 self.delta = 1 self.t_infl = 1 / self.users_in_country self.t_outdt = 5 # tweet outdating rate self.t_s = self.users_in_country / self.N self.t_e = self.users_in_country / self.N self.t_i = self.users_in_country / self.N self.tweets_line = self.create_tweets_line() self.I0 = 1 self.R0 = self.beta / self.gamma self.S0 = self.N - self.I0 - self.R0 self.E0 = self.N - (self.S0 + self.I0 + self.R0) self.Y0 = self.S0, self.E0, self.I0, self.R0 self.T0 = self.tweets[0] self.params = [("beta", self.beta), ("gamma", self.gamma), ("delta", self.delta), ("i0", self.I0), ("t0", self.T0), ("t_infl", self.t_infl), ("t_outdt", self.t_outdt), ("t_s", self.t_s), ("t_e", self.t_e), ("t_i", self.t_i)] self.frozen_params = ["beta", "gamma", "delta", "i0", "t0", "t_outdt"] self.fit_data = self.active self.fit_fun = self.fit_I
def _read_pop(self, populationdata): #开始正是读取人口数据 df = pd.read_csv(populationdata) country_properties1 = {} for index, row in df.iterrows(): try: ccc = coco.convert(names=row['Name'], to="name_short") country_properties1[ccc] = float(row['Population']) except: print(row['Name']) print(row['Population']) self.country_pop = {} print('second') for country, i in self.nodes.items(): if self.country_pop.get(country, -1) > 0: print('duplicate:', country) else: pop = country_properties1.get(country, 1) self.country_pop[country] = pop print('These countries have no pop data:') for k, v in self.country_pop.items(): if v <= 0: print(k) self.population = np.ones(len(self.nodes)) for cc, i in self.nodes.items(): self.population[i] = self.country_pop.get(cc, 1.0)
def is_country_present(dataframe, country): """ Checks if user input country is a valid country in the dataset """ iso3 = coco.convert(names=country, to='ISO3') # process some error if coco can't convert to a country in_dataset = iso3 in dataframe['iso3'] return in_dataset
def country(code): code = phonenumbers.region_code_for_country_code(code) print(code) if code == "ZZ": return jsonify({"message": "Resource not found"}), 404 return jsonify( {"country": country_converter.convert(code, to="short_name")})
def get_emoji_country(territory): """ Gets an emoji for the country""" try: return flag.flag(convert(names=[territory], to="ISO2")) except ValueError: return ""
def get_ip_info(dom): try: ip_address = socket.gethostbyname(dom) who_is = IPWhois(ip_address).lookup_rdap() # pprint.pprint(who_is) country_code = who_is['asn_country_code'] registration_date = who_is['asn_date'] date_now = date.today() rdate = date(*map(int, registration_date.split('-'))) difference = date_now - rdate if difference > datetime.timedelta(days=365): dom_age_gt_1year = 1 else: dom_age_gt_1year = 0 if country_code == ' ': country_name = 'Unknown' else: # convert the 2char country code into a short name ie US = United States country_name = coco.convert(country_code, to='name_short') if 'Not' in country_name: country_name = 'Unknown' else: country_name = country_name.replace(" ", "_") return country_name, dom_age_gt_1year except Exception: return 'Unknown', 0
def list_to_iso3(OBOR_country_list: list): OBOR_country_iso3 = country_converter.convert(names=OBOR_country_list, to= 'ISO3') if len(OBOR_country_iso3)==len(OBOR_country_list): return OBOR_country_iso3 else: return None
def get_cnt(cnts): """extract ISO from country field, replace unknown ISOs, make sure no duplicating bad names""" repl = { '2014 Revised Strategic Response Plan Sudan': 'Sudan', '2016 Humanitarian Response Plan': 'Iraq', '2017 Humanitarian Needs Overview Congo': 'Congo', 'Emergency Humanitarian Response Plan REVISION 2008': 'Kenya', 'Philippine: Typhoon Haiyan (Yolanda) Strategic Response Plan 2014': 'Philippines', 'Strategic Response Plan 2014 Occupied Palestian Territory': 'Palestine', 'Sudanese Red Crescent Society Emergency appeal 2014': 'Sudan', 'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE.pdf': 'Turkey', 'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE': 'Turkey', 'Regional Refugee & Resilience Plan 2015-2016': 'Syria', 'JRP For Rohingya Humanitarian Crisis.pdf': 'Bangladesh', 'Regional Refugee and Resilience Plan 2016-2017': 'Syria', 'Regional Refugee and Resilience Plan 2017-2018': 'Syria' } cnts = [repl[v] if v in repl else v for v in cnts] assert (len([v for v in cnts if v in repl]) == len(set([v for v in cnts if v in repl]))) assert ('not found' not in cnts) return coco.convert(names=cnts, to='ISO3')
def ensure_country_name(self, country): """ Ensure that the country name is correct. If not, the correct country name will be found. Args: country (str): country name Returns: str: country name """ df = self._ensure_dataframe(self._cleaned_df, name="the cleaned dataset", columns=[self.COUNTRY]) selectable_set = set(df[self.COUNTRY].unique()) # return country name as-is if selectable if country in selectable_set: return country # Convert country name converted = coco.convert(country, to="name_short", not_found=None) # Additional abbr abbr_dict = { "Congo Republic": "Republic of the Congo", "DR Congo": "Democratic Republic of the Congo", "UK": "United Kingdom", "Vatican": "Holy See", } name = abbr_dict.get(converted, converted) # Return the name if registered in the dataset if name in selectable_set: return name raise SubsetNotFoundError(country=country, country_alias=name)
def parse_hopkins(file_name): with open(file_name) as f: hopkins_data = json.load(f) #contains other (numbered) dictionaries that contain the info hopkins_data_dict = hopkins_data['data'] #data lists countries_list = [] confirmed_list = [] deaths_list = [] recovered_list = [] #add info to the corresponding lists for i in range(0, len(hopkins_data_dict)): countries_list.append(hopkins_data_dict[str(i)]['location']) confirmed_list.append(hopkins_data_dict[str(i)]['confirmed']) deaths_list.append(hopkins_data_dict[str(i)]['deaths']) recovered_list.append(hopkins_data_dict[str(i)]['recovered']) ISO_3_list = coco.convert(names = countries_list, to = 'ISO3') compiled_data = pd.DataFrame( { "location_code": ISO_3_list, "confirmed_cases": confirmed_list, "deaths": deaths_list, "recovered": recovered_list, "dt": file_name[37:-5] }) print(compiled_data) #substring includes the date and time of the file compiled_data.to_json('./Scraper/Data/parsed_data_' + file_name[29:])
def run(self, dispatcher, tracker, domain): from newsapi import NewsApiClient # Init newsapi = NewsApiClient(api_key='f908755783e34e738776e64eeacfbd17') print(tracker.get_slot('country')) country = tracker.get_slot('country') country_iso2 = coco.convert(names=country, to='ISO2', not_found=None) print(country_iso2) country_iso2_low = country_iso2.lower() # /v2/top-headlines top_headlines = newsapi.get_top_headlines(#q=country, #sources='bbc-news,the-verge', country=country_iso2_low) news_json = json.dumps(top_headlines) list_articles = json.loads(news_json) for articles in list_articles['articles']: print('Author: ' + str(articles['author'])) print('Source: ' + str(articles['source']['name'])) print('Title: ' + str(articles['title'])) print('URL: ' + str(articles['url']) + "\n") print("############################################################") dispatcher.utter_message('**Source:** ' + str(articles['source']['name']) + '\n **Title:** ' + str(articles['title']) + '\n **URL:** ' + str(articles['url']) + '\n')
def bw_get_activity_info_manually(cls, input_act_str, db_name, input_act_amount): # Extract the activity name apostrophes = [(m.start(0), m.end(0)) for m in re.finditer("'", input_act_str)] if len(apostrophes) == 1: ap_start = 0 ap_end = apostrophes[0][0] else: ap_start = apostrophes[0][1] ap_end = apostrophes[1][0] input_act_name = input_act_str[ap_start:ap_end] input_act_unit_loc = input_act_str[input_act_str.find("("): input_act_str.find(")") + 1] input_act_unit_loc_split = [ re.sub('[^-A-Za-z0-9-€-]', ' ', el).rstrip().lstrip() for el in input_act_unit_loc.split(',') ] input_act_unit = input_act_unit_loc_split[0] input_act_location = input_act_unit_loc_split[1] # Add comment when activity cannot be found input_act_values_dict = {} if 'exiobase' in db_name.lower() and "Manufacture of " in input_act_name: input_act_name = input_act_name[15:].capitalize() input_act_values_dict['name'] = input_act_name input_act_values_dict['unit'] = input_act_unit with warnings.catch_warnings(): warnings.simplefilter("ignore") location_iso2 = coco.convert(names=input_act_location, to='ISO2') if location_iso2 == "not found": location_iso2 = input_act_location input_act_values_dict['location'] = location_iso2 input_act_values_dict['amount'] = input_act_amount input_act_values_dict['database'] = db_name input_act_values_dict['type'] = ACTIVITY_TYPE_DICT['process'] # TODO remove hardcoding input_act_values_dict['comment'] = 'TODO could not find this activity' return input_act_values_dict
def pull_base(self): """ get the base table for incoming/outgoing movements, and then use it to generate the other 2 tables """ LOC = '../d0cz/unhcr_popstats_export_persons_of_concern_2018_06_20.csv' ref = pd.read_csv(LOC, skiprows=3) ref.columns = [ 'ref_' + v.lower().strip().replace(' ', '_') for v in ref.columns.values ] # only rename weird cols, keep the rest. total: """ ref_year ref_destination ref_origin ref_refugees ref_asylum-seekers ref_returned_refugees ref_idps ref_returned_idps ref_stateless_persons ref_others_of_concern ref_total_population """ ref.rename( { 'ref_country_/_territory_of_asylum/residence': 'ref_destination', 'ref_refugees_(incl._refugee-like_situations)': 'ref_refugees', 'ref_asylum-seekers_(pending_cases)': 'ref_asylum-seekers', 'ref_internally_displaced_persons_(idps)': 'ref_idps' }, axis=1, inplace=True) # add in ISOs, uid trans = {v: coco.convert(names=v, to='ISO3') \ for v in list(set(list(ref.ref_destination.values) + list(ref.ref_origin.values)))} trans['Serbia and Kosovo (S/RES/1244 (1999))'] = 'SRB' trans['Tibetan'] = 'CHN' ref['ref_dest_iso3'] = ref.apply(lambda x: trans[x['ref_destination']], axis=1) ref['ref_org_iso3'] = ref.apply(lambda x: trans[x['ref_origin']], axis=1) assert (list not in [type(v) for v in trans.values()]) # clean cols tc = [ 'ref_refugees', 'ref_asylum-seekers', 'ref_returned_refugees', 'ref_idps', 'ref_returned_idps', 'ref_stateless_persons', 'ref_others_of_concern', 'ref_total_population' ] for c in tc: ref[c] = ref.apply(lambda x: self.clean_col(x[c]), axis=1) return ref
def addContinentAndRegionColumn(data_frame): country_names, set_indices = np.unique(data_frame['Country'], return_inverse=True) name_count = len(country_names) continents = np.full(name_count, ' ') regions = np.full(name_count, ' ') for i in range(name_count): continents[i] = coco.convert(names=country_names[i], to='continent') regions[i] = coco.convert(names=country_names[i], to='UNregion') data_frame.insert(5, 'Continent', continents[set_indices]) data_frame.insert(5, 'Region', regions[set_indices]) return data_frame
def call_conan(country, to="isocode"): coco_logger = coco.logging.getLogger() coco_logger.setLevel(logging.CRITICAL) # https://stackoverflow.com/a/57986495/10295948 warnings.filterwarnings("ignore") return coco.convert(country, to=to, not_found=None)
def _name_combine(self, flowdata, casedata, populationdata): # 首先,我们要校准各个国家的名称,做法是分别把航空流量数据、国家人口数据和病例数据中的国家字段都加载进来,统一转换为标准国家名称 # flow data first_date = datetime.datetime(2019, 12, 1, 0, 0) day_len = (datetime.datetime.strptime('2020-12-31', '%Y-%m-%d') - first_date).days fijt = [] countries = set([]) for day in range(day_len): dd = (first_date + datetime.timedelta(days=day)).strftime("%Y-%m-%d") flushPrint(dd) filename = flowdata + dd + '.csv' df = pd.read_csv(filename) countries1 = set(df['Unnamed: 0']) countries2 = set(list(df.columns)[1:]) countries = countries | countries1 | countries2 flushPrint(len(countries)) countries_temp = list(coco.convert(names=countries, to="name_short")) countries1 = set(countries_temp) self.name_map = {} for i, cc in enumerate(countries): vv = self.name_map.get(cc, '') if len(vv) == 0: self.name_map[cc] = countries_temp[i] print(len(countries), len(countries1)) # case data df = pd.read_csv(casedata) countries = set(df['country'][1:]) countries2 = set(coco.convert(names=countries, to="name_short")) print(len(countries), len(countries2)) # population data df = pd.read_csv(populationdata) countries = set(df['Name'][1:]) countries3 = set(coco.convert(names=countries, to="name_short")) print(len(countries), len(countries3)) countries = countries1 | countries2 | countries3 self.nodes = {} for country in countries: if country != 'not found': idx = self.nodes.get(country, len(self.nodes)) self.nodes[country] = idx
def findLocation(locstr): """ Converts a string of location into a Location object :param locstr: location, in any format (str) :return: Location object with country, state and city separated (Location object) """ # Uses Location IQ API call to convert string to standardised format url = "https://us1.locationiq.com/v1/search.php" # BUG: If job is in CA, LocationIQ reads as Canada. Can add countrycodes, but am right now just appending USA to the end is working fine. data = { 'key': 'f44315769abf5d', 'q': str(locstr), 'format': 'json', 'normalizecity': '1', 'addressdetails': '1', 'statecode': '1' } # Tries 5 times in case code encounters a rate limit for i in range(5): response = json.loads(requests.get(url, params=data).text) # Tests for whether request limit or other error has happened with LocationIQ. Tries again after sleeping to overcome rate limits if type(response) == dict: if "error" in response: if response["error"] == "Rate Limited Second": print( "LocationIQ second rate limit exceeded. Trying again in 1 second..." ) time.sleep(1) continue elif response["error"] == "Rate Limited Minute": print( "LocationIQ minute rate limit exceeded. Trying again in 1 minute..." ) time.sleep(60) continue elif response["error"] == "Rate Limited Day": print( "LocationIQ day rate limit exceeded. Try again in a day, and find a way to reduce requests in future." ) # TODO: change exit calls to Error calls and handle as needed. exit(405) else: print("LocationIQ returned unknown error %s." % str(response["error"])) exit(405) break # Isolates important information to create Location object address = response[0]["address"] city = address["city"] state = address["state_code"] country = coco.convert(names=address["country_code"], to='ISOnumeric') return Location(country, state.upper(), city)
def pull(): LOC = '../d0cz/180417 Homepage dashboard data.xlsx' hist = pd.read_excel(LOC, sheet_name='DATA FUNDING & BENEFICIARIES') # colz hist.columns = hist.columns.str.lower() hist = hist.rename( {'country': 'country', 'year': 'year', 'uploaded': 'uploaded', 'quarter': 'quarter', 'status': 'status', 'url': 'url', 'region': 'region', 'type of crisis': 'type', 'lead': 'lead', 'co-chair / co-lead': 'co_lead', 'funding received': 'funding_received', 'funding required': 'funding_required', 'funding coverage': 'funding_coverage', 'count': 'count', '# of partners': 'num_partner', '# of people reached in total': 'num_reached_tot', '# of people targeted in total': 'num_targeted_tot', 'coverage against target': 'cov_against_target', '# of people reached with nfi': 'num_reached_nfi', '# of people targeted with nfi': 'num_target_nfi', '# of people reached with shelter': 'num_reached_shelt', '# of people targeted with shelter': 'num_targ_shelt', 'data from': 'data_from', 'source/comments': 'source_comment'}, axis=1 ) hist.columns = ['dash_' + v for v in hist.columns] # drop colz hist.drop('dash_unnamed: 20', axis=1, inplace=True) hist.drop('dash_coverage against target.1', axis=1, inplace=True) hist.drop('dash_unnamed: 24', axis=1, inplace=True) hist.drop('dash_coverage against target.2', axis=1, inplace=True) # drop rowz hist = hist[hist['dash_country'] != 'Pacific Region'] # new colz (new boyz) hist['dash_iso3'] = hist['dash_country'].apply(lambda x: coco.convert(names=x, to='ISO3')) hist['dash_uid'] = hist['dash_iso3'] + hist['dash_year'].map(str) # rm dupz hist = hist.drop_duplicates(subset='dash_uid', keep='last') return hist
def get_cnt(cnts): """extract ISO from country field, replace unknown ISOs, make sure no duplicating bad names""" repl = {'2014 Revised Strategic Response Plan Sudan': 'Sudan', '2016 Humanitarian Response Plan': 'Iraq', '2017 Humanitarian Needs Overview Congo': 'Congo', 'Emergency Humanitarian Response Plan REVISION 2008': 'Kenya', 'Philippine: Typhoon Haiyan (Yolanda) Strategic Response Plan 2014': 'Philippines', 'Strategic Response Plan 2014 Occupied Palestian Territory': 'Palestine', 'Sudanese Red Crescent Society Emergency appeal 2014': 'Sudan', 'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE.pdf': 'Turkey', 'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE': 'Turkey', 'Regional Refugee & Resilience Plan 2015-2016': 'Syria', 'JRP For Rohingya Humanitarian Crisis.pdf': 'Bangladesh', 'Regional Refugee and Resilience Plan 2016-2017': 'Syria', 'Regional Refugee and Resilience Plan 2017-2018': 'Syria' } cnts = [repl[v] if v in repl else v for v in cnts] assert (len([v for v in cnts if v in repl]) == len(set([v for v in cnts if v in repl]))) assert ('not found' not in cnts) return coco.convert(names=cnts, to='ISO3')