def byCountry_data_cleaning(self, df): # test = (df['year'] == 1987) & (df['country'] == 'Albania') # print(df[test].filter(items=['country', # 'year', # 'suicides_no', # 'suicides/100k pop', # 'population'])) countries = df.groupby(["country", "year"])["suicides_no"].sum() population = df.groupby(["country", "year"])["population"].sum() newDict = {"country": [], "year": [], "Suicide Number": [], "Suicide per 100k": [], "GDP per Capita": [] } for name, group in df.groupby(['country', 'year']): newDict["country"].append(name[0]) newDict["year"].append(name[1]) newDict["Suicide Number"].append(countries[name[0], name[1]]) newDict["Suicide per 100k"].append((countries[name[0], name[1]] * 1000000) / population[name[0], name[1]]) newDict["GDP per Capita"].append(group['gdp_per_capita ($)'].iloc[0]) new_df = pd.DataFrame.from_dict(newDict) sucides_by_continet = Sucides_by_Continet() updated_df = sucides_by_continet.updateData(new_df) updated_df['iso_alpha'] = updated_df.apply(lambda row: pc.country_name_to_country_alpha3(row.country, cn_name_format="default"), axis=1) print('---------------------') print(updated_df) print('---------------------') return updated_df.sort_values(by=['year'])
def iso2_to_iso3(x): try: output = pc.country_name_to_country_alpha3(pc.country_alpha2_to_country_name(x, cn_name_format="default"), cn_name_format="default") except: output = '' return output
def test_country_name_to_country_alpha3(self): cn_a3_code = country_name_to_country_alpha3('USA') assert (cn_a3_code) assert (cn_a3_code == 'USA') cn_a3_code = country_name_to_country_alpha3('United States') assert (cn_a3_code) assert (cn_a3_code == 'USA') cn_a3_code = country_name_to_country_alpha3('South Korea') assert (cn_a3_code) assert (cn_a3_code == 'KOR') cn_a3_code = country_name_to_country_alpha3('Russian Federation') assert (cn_a3_code) assert (cn_a3_code == 'RUS')
def country_alpha3(cn_name): alpha3 = "UKN" try: alpha3 = pc.country_name_to_country_alpha3(cn_name) except: pass return alpha3
def suicidesbygeo(self, df): df['iso_alpha'] = df.apply(lambda row: pc.country_name_to_country_alpha3(row.country, cn_name_format="default"), axis=1) df = df.sort_values(by=['year']) fig = px.scatter_geo(df, locations="iso_alpha", color="Continent", hover_name="country", size="suicides_no", animation_frame="year", title="Continent wide suicides with each year") fig.update_layout(width=1080, height=600) return fig
def country_name_to_country_code(row): if row.geo == 'Kosovo': return 'KSV' try: return pycountry_convert.country_name_to_country_alpha3(row.geo) except KeyError: return ""
def get_customers_count(self, country=None): parameter_pack = lambda **args:\ customers_service.get_customers_count.apply_async( args=(args,), queue='customers') country_code = None if country is None \ else pycountry_convert.country_name_to_country_alpha3(country) return parameter_pack(country=country_code).get()
def load_world_df(): world_df = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) countries_to_modify = ['France', 'Norway', 'Somaliland'] for country in countries_to_modify: world_df.loc[world_df['name'] == country, 'iso_a3'] = country_name_to_country_alpha3(country) # world_df = world_df[(world_df.pop_est > 0) & (world_df.name != "Antarctica")] return world_df.set_index('iso_a3').drop( columns=['gdp_md_est', 'geometry', 'pop_est']).rename( columns={'name': 'country'})
def get_iso(country): global isos if country not in isos.keys(): try: isoCode = pyCountry.country_name_to_country_alpha3(country, cn_name_format="default") isos[country] = isoCode except: try: isoCode = pycountry.countries.search_fuzzy(country)[0].alpha_3 isos[country] = isoCode except: isos[country] = '' return isos[country]
def get_dataframe(): df = pd.read_csv('final_op_sentiments_daily.csv') def to_timestamp(value): return time.mktime( datetime.datetime.strptime(value, "%Y-%m-%d").timetuple()) df['timestamp'] = df.Date.apply(lambda x: to_timestamp(x)) df['Country Name'] = df.Country.apply( lambda x: country_name_to_country_alpha3( country_alpha2_to_country_name(x))) df['Ratio'] = df.Positive / df.Negative return df
def country_converter(self, countries): country_alpha3 = [] for country in countries: if len(country) == 2: alpha3 = pycountry_convert.map_country_alpha2_to_country_alpha3()[country] elif len(country) == 3: alpha3 = country else: alpha3 = pycountry_convert.country_name_to_country_alpha3(country) if alpha3 not in country_alpha3: country_alpha3.append(alpha3) return country_alpha3
def getCode(name): try: if name == 'Britain': return getCode('United Kingdom') if name == 'Trinidad-Tobago': return getCode('Trinidad and Tobago') if name == 'Sao Tome': return getCode('São Tomé and Príncipe') if name == 'Bosnia': return getCode('Bosnia and Herzegovina') if name == 'UAE': return getCode('United Arab Emirates') return pc.country_name_to_country_alpha3(name) except KeyError as e: print(e)
def kratice_drzav(): ''' naredi slovar mednarodnih kratic držav(a3); ključi so slovensko zapisane države ''' sl_kratic = {} for ang, slo in prevod_drzav().items(): try: cn_a3_code = country_name_to_country_alpha3(ang) except: cn_a3_code = 'Unknown' sl_kratic[slo] = cn_a3_code #dodamo še Jugoslavijo, ki je ni modulu 'pycountry_convert' sl_kratic['Jugoslavija'] = 'YU' return sl_kratic
def getCode(country): if getCountry(country) == 'Cocos Islands': return pc.country_name_to_country_alpha3('Cocos (Keeling) Islands') elif getCountry(country) == 'Palestinian Territory': return '' elif getCountry(country) == 'Kosovo': return '' elif getCountry(country) == 'Vatican': return pc.country_name_to_country_alpha3( 'Holy See (Vatican City State)') elif getCountry(country) == 'Reunion': return pc.country_name_to_country_alpha3('Réunion') elif getCountry(country) == 'Saint Helena': return pc.country_name_to_country_alpha3( 'Saint Helena, Ascension and Tristan da Cunha') elif getCountry(country) == 'Netherlands Antilles': return '' elif getCountry(country) == 'U.S. Virgin Islands': return pc.country_name_to_country_alpha3('Virgin Islands, U.S.') return pc.country_name_to_country_alpha3(getCountry(country))
temp.loc[temp['Country name'] == 'Taiwan Province of China', 'Country name'] = 'Taiwan, Province of China' temp.loc[temp['Country name'] == 'Hong Kong S.A.R. of China', 'Country name'] = 'Hong Kong' temp.loc[temp['Country name'] == 'Congo (Brazzaville)', 'Country name'] = 'Congo' temp.loc[temp['Country name'] == 'Palestinian Territories', 'Country name'] = 'Palestine, State of' temp.drop(index=temp[temp['Country name'] == 'Kosovo'].index, inplace=True) # Kosovo Code agreed on not to use by ISO 3166 temp.drop(index=temp[temp['Country name'] == 'North Cyprus'].index, inplace=True) # Not part of the ISO 3166 standard temp['iso_alpha'] = temp['Country name'].apply( lambda x: pc.country_name_to_country_alpha3(x, )) temp2 = temp.sort_values(by=['Social support'], ascending=False)[:20] fig = px.choropleth( temp, locations='iso_alpha', color='Social support', hover_name='Country name', color_continuous_scale=px.colors.diverging.RdYlGn, ) fig.update_layout(showlegend=False, template=draft_template, annotations=[ dict( templateitemname="draft watermark", text="www.blackandwhitedata.com", )
import os import matplotlib.pyplot as plt import pandas as pd import geopandas import mapclassify from pycountry_convert import country_name_to_country_alpha3 datafile = 'data.csv' shapefile = '../ne_10m_admin_0_countries_lakes.shp' cols = ['Country', 'Salary (USD)'] df = pd.read_csv(datafile, names=cols) df['Country'] = [country_name_to_country_alpha3(x) for x in df['Country']] gdf = geopandas.read_file(shapefile)[['ADM0_A3', 'geometry']].to_crs('+proj=robin') df = gdf.merge(df, left_on='ADM0_A3', right_on='Country') ax = df.dropna().plot(column=cols[1], cmap='Blues', figsize=(16, 10), scheme='equal_interval', k=5, legend=True) df.plot(ax=ax, color='#fafafa', hatch='///')
if 'Faeroe Islands' == each.lstrip(): each = 'Faroe Islands' if 'The Gambia' == each.lstrip(): continue if 'Falkland Islands (Islas Malvinas)' == each.lstrip(): continue if 'Netherlands Antilles' == each.lstrip(): continue if 'Aruba, the Netherlands with respect to' == each.lstrip(): continue if 'British Overseas Territory of Saint Helena, Ascension and Tristan da Cunha' == each.lstrip( ): continue if 'Wallis and Futuna Islands' == each.lstrip(): continue code = pc.country_name_to_country_alpha3(each.lstrip()) if code not in codeList: codeList.append(code) countriesCode.append(code) row = {TYPE: item[2], date: time, Signatories: countriesCode} result[item[0]] = row matrix = [["code1", "code2"]] for year in range(2000, 2023): matrix[0].append(str(year)) for code1 in codeList: for code2 in codeList: if code1 == code2: continue row = [code1, code2] for year in range(2000, 2023):
def get_alpha3(col): try: cn_a3_code = country_name_to_country_alpha3(col) except: cn_a3_code = 'Unknown' return (cn_a3_code)
def country_convert(x): return country_name_to_country_alpha3(x)
def try_convert(country_name): try: return country_name_to_country_alpha3(country_name) except: return None
def update_graph(country_code, strictness, clicks): country = country_alpha2_to_country_name(country_code) dest_lat = latlon.loc[latlon['name'] == country]['latitude'].iloc[0] dest_lon = latlon.loc[latlon['name'] == country]['longitude'].iloc[0] dest_flights = flights_data[flights_data['dest_airport_country'] == country] if dest_flights.size == 0: fig = px.scatter_geo(lat=[dest_lat], lon=[dest_lon], projection='natural earth') markdown = dcc.Markdown( "#### NO DATA AVAILABLE FOR THE SELECTED COUNTRY") fig.update_layout(margin=dict(l=0, r=0, t=0, b=0)) else: if clicks % 2 == 0: fig = px.choropleth(dest_flights, locationmode="ISO-3", locations='CC', color='flight_capacity', color_continuous_scale="spectral", template='seaborn', projection='natural earth') label = "View Positive Rate" else: fig = px.choropleth(inf_choropleth_recent_data, locationmode="ISO-3", locations='iso_code', color='positive_rate', color_continuous_scale="reds", template='seaborn', projection='natural earth') label = "View Flight Capacity" fig.update_layout(margin=dict(l=0, r=0, t=0, b=0)) country_3 = a2toa3[country_code] country_cr = risk_factors[risk_factors['iso_code'] == country_3] for val in dest_flights.itertuples(): source = val[1] if strictness == 'high' and not country_name_to_country_alpha3( source) in country_cr['sources_y'].iloc[0]: continue try: lat = latlon.loc[latlon['name'] == source]['latitude'].iloc[0] lon = latlon.loc[latlon['name'] == source]['longitude'].iloc[0] fig = fig.add_scattergeo(lat=[lat, dest_lat], lon=[lon, dest_lon], line=dict(width=1, color='#1F1F1F'), mode='lines+text', text="✈️", showlegend=False) except: continue strictness_level = { 'low': "Lowest", 'med': "Moderate", 'high': "Highest" }[strictness] markdown = dcc.Markdown(text.format(country, strictness_level)) return fig, markdown, label
def new_create_articles_dfs(path_of_files): list_of_articles = [] folder = os.fsencode(path_of_files) count = 0 dict_of_journals = dict() affiliations_list = [] for file in os.listdir(folder): filename = os.fsdecode(file) if filename.endswith('.json'): with open(f'{path_of_files}/{filename}', 'r', encoding='utf-8') as article_file: article_dict = json.load(article_file) authors = article_dict.pop('authors') for author in authors: if author['affiliation'] is not None: for affiliation in author['affiliation']: if affiliation is not None: if affiliation['original_name'] is not None: curr_aff = { 'journal': article_dict['journal_title'], 'article': os.path.splitext(filename)[0], 'author': f"{author['given']} {author['family']}", 'affiliation': affiliation['original_name'], 'original_name': affiliation['original_name'], 'year': article_dict['date'], 'country': None, 'iso_a3': None } if affiliation[ 'normalized_name'] is not None: curr_aff['affiliation'] = affiliation[ 'normalized_name'] curr_aff['country'] = affiliation[ 'country'] curr_aff[ 'iso_a3'] = country_name_to_country_alpha3( affiliation['country']) affiliations_list.append(curr_aff) else: count += 1 # print("aff [{None}]") # print(count) # print(author) # print(article_dict['article_title']) else: count += 1 # print("aff [None]") # print(count) # print(author) # print(article_dict['article_title']) else: count += 1 # print("aff None") # print(count) # print(author) # print(article_dict['article_title']) list_of_articles.append(article_dict) return pd.DataFrame(list_of_articles), pd.DataFrame( affiliations_list).set_index( ['journal', 'article', 'author', 'affiliation']).sort_index()
indexNames = df[ df['country_code'] == '09' ].index df.drop(indexNames , inplace=True) indexNames = df[ df['country_code'] == '01' ].index df.drop(indexNames , inplace=True) indexNames = df[ df['country_code'] == '05' ].index df.drop(indexNames , inplace=True) indexNames = df[ df['country_code'] == '12' ].index df.drop(indexNames , inplace=True) indexNames = df[ df['country_code'] == '06' ].index df.drop(indexNames , inplace=True) indexNames = df[ df['country_code'] == '11' ].index df.drop(indexNames , inplace=True) df['country_code'] = df.country_code.apply(lambda x: country_name_to_country_alpha3(country_alpha2_to_country_name(x))) df['death rate']=(df['dead']/df['confirmed'])*100 df['recovery rate']=(df['recovered']/df['confirmed'])*100 st.sidebar.title("Filter country ") temp = df.to_dict('list') temp['location'] = list(set(temp['location'])) data = st.sidebar.selectbox("Select Country", temp['location']) st.subheader("NUMBER OF COVID 19 CASES AROUND THE WORLD") fig1 = go.Figure(data=go.Choropleth( locations = df['country_code'], z = df['confirmed'], text = df['location'], colorscale = 'Reds', autocolorscale=False,
def fetch_data(self, type_data): print("Fetching data for", type_data) try: df = pd.read_csv(self.URL[type_data]) # Remove unneeded Lat and Long columns df = df.drop(['Lat', "Long"], axis=1) # Group by country (some countries are divided by region) df = df.groupby(['Country/Region']).sum() df_data = [] for index, rows in df.iterrows(): # Rough fix for None iso-alpha cases if index in ["Diamond Princess", "Holy See", "MS Zaandam"]: # Obsecure countries continue if index == 'Burma': # Burma == Myanmar country_name = 'Myanmar' iso_alpha2 = 'MM' iso_alpha3 = 'MMR' elif index == "Congo (Brazzaville)": country_name = "Republic of the Congo" iso_alpha2 = 'CG' iso_alpha3 = 'COG' elif index == "Congo (Kinshasa)": country_name = "Democratic Republic of the Congo" iso_alpha2 = 'CD' iso_alpha3 = 'COD' elif index == "Cote d'Ivoire": country_name = index iso_alpha2 = 'CI' iso_alpha3 = 'CIV' elif index == 'Korea, South': country_name = 'South Korea' iso_alpha2 = 'KR' iso_alpha3 = 'KOR' elif index == 'Kosovo': country_name = index iso_alpha2 = 'XK' iso_alpha3 = 'XKX' elif index == 'Laos': country_name = index iso_alpha2 = 'LA' iso_alpha3 = 'LAO' elif index == 'Taiwan*': country_name = 'Taiwan' iso_alpha2 = 'TW' iso_alpha3 = 'TWN' elif index == 'US': country_name = 'United States of America' iso_alpha2 = 'US' iso_alpha3 = 'USA' elif index == 'Vietnam': iso_alpha2 = 'VN' iso_alpha3 = 'VNM' elif index == 'West Bank and Gaza': country_name = 'Palestine' iso_alpha2 = 'PS' iso_alpha3 = 'PSE' else: country_name = index iso_alpha2 = country_name_to_country_alpha2( index, cn_name_format="default") iso_alpha3 = country_name_to_country_alpha3( index, cn_name_format="default") for date, num in rows.items(): try: df_data.append({ 'country': country_name, 'continent': self.CONTINENTS[country_alpha2_to_continent_code( iso_alpha2)], 'iso_alpha2': iso_alpha2, 'iso_alpha3': iso_alpha3, 'date': date, type_data: num }) except: continue df_new = pd.DataFrame(df_data) df_new['date'] = pd.to_datetime(df_new['date'], format='%m/%d/%y') df_new['date'] = df_new['date'].astype(str) self.dfs[type_data] = df_new except Exception as e: print("Fetch failed") print("Error message:") print(e) return None
df2 = df.set_index('Country name') temp = pd.DataFrame(df2['Perceptions of corruption']).reset_index() #ADAPTING TO THE ISO 3166 STANDARD temp.loc[temp['Country name'] == 'Taiwan Province of China', 'Country name'] = 'Taiwan, Province of China' temp.loc[temp['Country name'] == 'Hong Kong S.A.R. of China', 'Country name'] = 'Hong Kong' temp.loc[temp['Country name'] == 'Congo (Brazzaville)','Country name'] = 'Congo' temp.loc[temp['Country name'] == 'Palestinian Territories','Country name'] = 'Palestine, State of' temp.drop(index=temp[temp['Country name'] == 'Kosovo'].index, inplace=True) # Kosovo Code agreed on not to use by ISO 3166 temp.drop(index=temp[temp['Country name'] == 'North Cyprus'].index, inplace=True) # Not part of the ISO 3166 standard temp['iso_alpha'] = temp['Country name'].apply(lambda x:pc.country_name_to_country_alpha3(x,)) temp2 = temp.sort_values(by=['Perceptions of corruption'], ascending=False)[:20] fig = px.choropleth(temp, locations='iso_alpha', color='Perceptions of corruption', hover_name='Country name', color_continuous_scale=px.colors.diverging.Picnic, ) fig.update_layout( showlegend=False, template=draft_template, annotations=[ dict( templateitemname="draft watermark", text="www.blackandwhitedata.com", ) ],
# ****** Visualization ****** # *************************************************** count_country = pd.DataFrame(data_table.Country.value_counts()) count_country = count_country.reset_index() count_country.columns = ['country', 'count'] count_country['country'].replace( { 'EIRE': 'Ireland', 'Channel Islands': 'United Kingdom', 'RSA': 'South Africa' }, inplace=True) count_country = count_country.loc[ ~count_country.country.isin(['Unspecified', 'European Community']), :] count_country['country_alpha_3'] = count_country.country.apply( lambda x: country_name_to_country_alpha3(x)) fig = px.choropleth(count_country, locations='country_alpha_3', color='count', hover_name='country', color_continuous_scale=px.colors.sequential.Plasma) plot(fig) # *************************************************** # ****** Data Preparation For Modeling ****** # *************************************************** # ------------------------------- # 1. Skewness check fig = make_subplots(rows=1, cols=3) fig.add_trace(go.Box(y=customers_data.recency, name='recency'), row=1, col=1)