Esempio n. 1
0
 def spreading(self):
     #exemple
     while self.run:
         if random.randint(1, 30) == 1:
             #global
             if len(self.healthy) != 0:
                 newInfected = random.choice(self.healthy)
                 self.infect.plotAvion(newInfected)
                 if newInfected.name not in self.succeptible:
                     self.succeptible.append(newInfected.name)
                 plt.title(
                     coco.convert(names=newInfected.name, to="short_name") +
                     " IS NOW INFECTED",
                     fontsize=50)
                 self.update(newInfected)
         else:
             #local
             if len(self.succeptible) != 0:
                 newInfected = random.choice(self.succeptible)
                 plt.title(
                     coco.convert(names=newInfected, to="short_name") +
                     " IS NOW INFECTED",
                     fontsize=50)
                 self.update(self.finde(newInfected))
         if len(self.succeptible) == 0 and len(self.healthy) == 0:
             self.infect.end()
             print("Every contry are infected")
             break
Esempio n. 2
0
    def get_sentiment_per_country_per_day(self):
        select_sql = "SELECT u.country_code, avg(t.sentiment_pol) AS sentiment, AVG(t.sentiment_pol*t.sentiment_pol) - AVG(t.sentiment_pol)*AVG(t.sentiment_pol) AS variance, DATE(t.created_at) AS created_at from tweet t " \
                     "JOIN user u on u.id = t.user_id " \
                     "WHERE u.country_code IN ('us','pl', 'gb', 'it') AND t.created_at > '2020-03-07' AND t.created_at <> '2020-03-17' AND t.created_at <> '2020-04-29' " \
                     "GROUP BY u.country_code, DATE(t.created_at);"

        self.cur.execute(select_sql)
        data = self.cur.fetchall()
        result = dict()

        for row in data:
            cc = coco.convert(names=row['country_code'], to='ISO3')
            if cc not in result.keys():
                cn = coco.convert(cc, to='name_short')
                result[cc] = dict()
                result[cc]['dates'] = []
                result[cc]['sentiment'] = []
                result[cc]['stdev'] = []
                result[cc]['name'] = cn

            result[cc]['dates'].append(row['created_at'])
            sentiment = row['sentiment'] if row['sentiment'] is not None else 0
            stdev = sqrt(row['variance']) if row['variance'] is not None else 0
            result[cc]['sentiment'].append(sentiment)
            result[cc]['stdev'].append(stdev)
        return result
Esempio n. 3
0
    def get_tweets_per_country(self,
                               from_date=None,
                               to_date=None,
                               divided_by_population=False):
        from_date, to_date = self.parse_from_to_date(from_date, to_date)

        select_sql = "SELECT u.country_code, count(*) AS number FROM tweet t " \
                     "JOIN user u on u.id = t.user_id " \
                     "WHERE u.country_code IS NOT NULL AND u.country_code <> 'und' AND t.created_at >= '" + from_date + "' AND t.created_at <= '" + to_date + "' " \
                                                                                                                                                              "GROUP BY u.country_code"
        self.cur.execute(select_sql)
        data = self.cur.fetchall()
        result = dict()
        result['country_code'] = []
        result['country_name'] = []
        result['number'] = []
        for row in data:
            cc = coco.convert(names=row['country_code'], to='ISO3')
            cn = coco.convert(cc, to='name_short')
            result['country_code'].append(cc)
            result['country_name'].append(cn)
            result['number'].append(row['number'])
        if divided_by_population:
            population = self.get_countries_populations(result['country_name'],
                                                        all=True)
            for i in range(len(result['country_name'])):
                name = result['country_name'][i]
                if name in population.keys():
                    if population[name] is not None:
                        result['number'][i] /= population[name]
                    else:
                        result['number'][i] = 0
                else:
                    result['number'][i] = 0
        return result
Esempio n. 4
0
def ratio(df):
    df_ratio = df.reset_index().groupby('country', as_index=False).sum(
    )  #group the data by country (date is not considered)

    pop = pd.read_excel('data/population.xlsx')  #data for country's population

    pop.Country = coco.convert(names=pop.Country.to_list(),
                               to='short_name')  #convert country names
    pop.columns = ['country', 'population']  #rename columns

    country_list = coco.convert(names=df_ratio.country.to_list(),
                                to='short_name',
                                not_found=None)  #convert country names
    df_ratio['country'] = country_list

    df_ratio = pd.merge(df_ratio, pop, how='left',
                        on='country')  #merge with main df

    df_ratio[
        'case_ratio'] = df_ratio.new_case / df_ratio.population  #calculate the case per capita ratio
    df_ratio[
        'death_ratio'] = df_ratio.new_death / df_ratio.population  # calculate the death per capita ratio

    try:
        dest = 'flags_ratio'
        copy_flag(dest, country_list, ship=False)
    except:
        print("Copying flags to Tableau folder unsuccessful")

    return df_ratio
Esempio n. 5
0
def copy_flag(dest, country_list, ship=True):
    import os, shutil
    import config

    path = 'tableau/flags'
    dest = config.dir + dest

    files = os.listdir(path)
    shutil.rmtree(dest, ignore_errors=True)  #remove existing flag folder
    os.mkdir(dest)  #remake a new folder

    for file in files:
        if ship:
            country_name = coco.convert(
                names=file.split('.')[0], to='short_name',
                not_found=None)  #convert country names to standard name
        else:
            country_name = coco.convert(names=file.split('.')[0],
                                        to='short_name')

        if country_name in country_list:  #Copy the country flags if they are in the dataset
            src = os.path.join(path, file)
            shutil.copy(src, dest)
            if country_name == 'DR Congo':  #the capitalized R in DR is problematic for Tableau
                os.rename(os.path.join(dest, file),
                          os.path.join(dest, 'Dr Congo.png'))
            elif country_name == 'MS Zaandam':  #the capitalized S in MS is problematic for Tableau
                os.rename(os.path.join(dest, file),
                          os.path.join(dest, 'Ms Zaandam.png'))
            else:
                os.rename(os.path.join(dest, file),
                          os.path.join(dest, country_name + '.png'))

    return
Esempio n. 6
0
def country(df):
    df_country = df.reset_index().groupby(
        ['date', 'country'],
        as_index=False).sum()  #group the data by date (daily) and then country

    country_list = coco.convert(
        names=df_country.country.to_list(), to='short_name',
        not_found=None)  #convert country names to standard
    df_country['country'] = country_list
    df_country['continent'] = coco.convert(names=df_country.country.to_list(),
                                           to='continent')

    #this dataframe is for creating a ranking column based on total confirmed cases, which is helpful in Tableau
    df_country_sort = df_country.groupby(
        'country', as_index=False).sum().sort_values('new_case',
                                                     ascending=False)
    df_country_sort.index = np.arange(1, len(df_country_sort) + 1)
    df_country_sort = df_country_sort.reset_index().loc[:, 'index':'country']

    df_country = pd.merge(df_country,
                          df_country_sort,
                          how='left',
                          on='country')  #merge with main dataframe

    try:
        dest = 'flags_country'
        copy_flag(dest, country_list)
    except:
        print("Copying flags to Tableau folder unsuccessful")

    return df_country
Esempio n. 7
0
    async def flag(self, ctx, *country: str):
        if len(country) == 0:
            country = random.choice(quiz_country_list)
            result4 = coco.convert(names=country, to="ISO2")
            url = f"https://flagcdn.com/w320/{result4.lower()}.jpg"
            e = discord.Embed(title=f"Flag of {country.title()}")
            e.set_image(url=url)
            await ctx.send(embed=e)
        else:
            country = " ".join(country)
            country = await country_filter(country, ctx)
            if country is None:
                return
            country = country["name"]
            result4 = coco.convert(names=country, to="ISO2")

            url = f"https://flagcdn.com/w320/{result4.lower()}.jpg"
            e = discord.Embed(title=f"Flag of {country.title()}")
            e.set_image(url=url)
            try:
                await ctx.send(embed=e)
            except:
                embed = discord.Embed(title="Error",
                                      description=":x: Country not found")
                await ctx.send(embed=embed)
def add_country_code(data, ctry_col):
    """
    Append two new columns to the data containing each entry's country's country codes.

    :param data: filepath (.csv or .xlsx extension) or dataframe.
    :type data: str, DataFrame, geopandas.GeoDataFrame.
    :param ctry_col: name of the country column.
    :type ctry_col: str.
    :return: the modified dataframe with the new columns 'ISO2' and 'ISO3' for two-letter and three-letter country
             codes respectively.
    :rtype: DataFrame if the type of `data` is DataFrame or str, or geopandas.GeoDataFrame if it is geopandas.GeoDataFrame.

    >>> import pandas as pd
    >>> df = pd.DataFrame({'City': ['Rabat', 'Lyon', 'Cleveland'],
    ...                    'Country': ['Morocco', 'France', 'United States of America']})
    >>> add_country_code(df=data, ctry_col='Country')
            City                   Country ISO2 ISO3
    0      Rabat                   Morocco   MA  MAR
    1       Lyon                    France   FR  FRA
    2  Cleveland  United States of America   US  USA
    """
    df = read_data(data, {ctry_col})
    df['ISO2'] = None
    df['ISO3'] = None

    df['ISO2'] = coco.convert(names=list(df[ctry_col]), to='ISO2')
    df['ISO3'] = coco.convert(names=list(df[ctry_col]), to='ISO3')

    return df
Esempio n. 9
0
    async def gni_percap(self, ctx, country: str, year: int):
        await ctx.defer(hidden=True)
        arg = country
        arg2 = str(year)
        try:
            country1 = coco.convert(names=arg, to="iso2")
            country2 = []
            country2.append(country1)

            # set up the indicator I want (just build up the dict if you want more than one)
            indicators = {"NY.GNP.PCAP.CD": "GNI per Capita"}

            # grab indicators above for countires above and load into data frame
            df = wbdata.get_dataframe(
                indicators, country=country2, convert_date=False
            ).to_dict()["GNI per Capita"][arg2]

            if str(df) == "nan":
                embed = discord.Embed(
                    title="Sorry",
                    description="**We couldn't find data for that year**",
                    color=0xFF5733,
                )

                embed.set_thumbnail(url=url)
                await ctx.send(embed=embed)

            else:

                embed = discord.Embed(
                    title="GNI per capita of {}".format(arg),
                    description=f"The gni per capita of {arg} in {arg2} was/is $`{str(df)}`",
                    color=0xFF5733,
                )

                result3 = coco.convert(names=arg, to="ISO2")

                embed.set_thumbnail(
                    url=f"https://flagcdn.com/w80/{result3.lower()}.jpg"
                )

                embed.set_footer(text="Information requested by: {}".format(ctx.author))

                await ctx.send(embed=embed)

        except:
            embed = discord.Embed(
                title="Sorry",
                description="** We could not find data for that year**",
                color=0xFF5733,
            )

            embed.set_thumbnail(url=url)

            await ctx.send(embed=embed)
Esempio n. 10
0
def prepare_daily_report(mes,dia):

    current_date = (datetime.today() - timedelta(days=1)).strftime('%m-%d-%Y')

    #df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/' + current_date + '.csv')
    df_bar = pd.read_csv('data_final.csv')
    #df_country = df.groupby(['Country_Region']).sum().reset_index()
    #df_country.replace('US', 'United States', inplace=True)
    df_dia = df_bar[df_bar['dia_nomi']== dia]
    df_mes = df_dia[df_dia['mes']== mes]

    #atraso+puntual
    df_mes['atraso_total'] = df_mes['atraso'].replace([0], [1])
    
    df_country = df_mes.groupby(['dest_pais']).sum().reset_index()

    df_country = df_country[df_country['dest_pais']!= 'CL']

    
    #code_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
    code_df = pd.read_csv('code.csv')
    df_country['pais']= coco.convert(names=df_country.dest_pais.tolist(), to='name_short', not_found=None)
    df_country_code = df_country.merge(code_df, left_on='pais', right_on='COUNTRY', how='left')
    
    return(df_country_code)
Esempio n. 11
0
    async def states(self, ctx, *, country):
        data = await country_filter(country, ctx)
        if data is None:
            return

        country = data["name"]
        country1 = CountryInfo(country)

        result = country1.provinces()

        for i, x in enumerate(result):
            result[i] = "`" + x + "`"

        result1 = " |".join(result)

        result2 = re.sub(r"(?<=[|])(?=[^\s])", r" ", result1)

        embed = discord.Embed(
            title="States of " + country,
            description="**{result2}**".format(result2=result2),
            color=0xFF5733,
        )

        result4 = coco.convert(names=country, to="ISO2")
        embed.set_thumbnail(
            url=f"https://flagcdn.com/w80/{result4.lower()}.jpg")

        embed.set_footer(
            text="Requested by: {name}".format(name=ctx.author),
            icon_url=ctx.author.avatar_url,
        )

        await ctx.send(embed=embed)
Esempio n. 12
0
def iso3_to_country(iso3):
    """ Take user input and convert it to the short version of the country name """

    if iso3 == 'Global':
        return 'Global'
    country = coco.convert(names=iso3, to='name_short')
    return country
def get_data(cur, conn, start_date, end_date):
    countries = []
    cur.execute(f'SELECT country FROM GDP ORDER BY GDP DESC')
    country_names = cur.fetchall()
    for row in range(0, 99, 10):
        countries.append(country_names[row][0])
    codes = coco.convert(names=countries, to='ISO3')
    request_url = f'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/{start_date}/{end_date}'
    request = requests.get(request_url)
    jsons = json.loads(request.text)
    data = jsons.get('data')
    data_list = []
    i = 0
    for name in codes:
        dateCases = []
        for date in data:
            all_cases = data[date].get(codes[i])
            if all_cases is None:
                continue
            else:
                cases = all_cases.get('confirmed')
                dateCases.append((date, countries[i], cases))
        data_list.append(dateCases)
        i += 1
    return data_list
Esempio n. 14
0
    def __init__(self, db, predict_range, country_name, state_name=None):
        pandemic_data = db.get_epidemic_data_in(
            [country_name], ['deaths', 'confirmed', 'recovered'],
            "COVID19",
            since_epidemy_start=False,
            state_name=state_name,
            from_date='2020-03-07')
        country_code = coco.convert(country_name, to='ISO2').lower()

        self.timeline = pandemic_data['dates']
        self.extended_timeline = self.timeline

        self.predict_range = predict_range

        self.tweets, self.positive_tweets, self.negative_tweets = db.get_tweets_per_day_in(
            country_code, state_name, self.timeline[0], self.timeline[-1])

        self.users_in_country = db.get_number_of_users_in(
            country_code, state_name)

        self.days = len(self.tweets)

        self.confirmed = pandemic_data[country_name]['confirmed']
        self.recovered = pandemic_data[country_name]['recovered']
        self.deaths = pandemic_data[country_name]['deaths']

        self.active = [
            pandemic_data[country_name]['confirmed'][i] - self.recovered[i] -
            self.deaths[i] for i in range(len(self.deaths))
        ]

        self.N = pandemic_data[country_name]['population']

        self.beta = 1
        self.gamma = 1
        self.delta = 1

        self.t_infl = 1 / self.users_in_country
        self.t_outdt = 5  # tweet outdating rate
        self.t_s = self.users_in_country / self.N
        self.t_e = self.users_in_country / self.N
        self.t_i = self.users_in_country / self.N

        self.tweets_line = self.create_tweets_line()

        self.I0 = 1
        self.R0 = self.beta / self.gamma
        self.S0 = self.N - self.I0 - self.R0
        self.E0 = self.N - (self.S0 + self.I0 + self.R0)
        self.Y0 = self.S0, self.E0, self.I0, self.R0
        self.T0 = self.tweets[0]

        self.params = [("beta", self.beta), ("gamma", self.gamma),
                       ("delta", self.delta), ("i0", self.I0), ("t0", self.T0),
                       ("t_infl", self.t_infl), ("t_outdt", self.t_outdt),
                       ("t_s", self.t_s), ("t_e", self.t_e), ("t_i", self.t_i)]
        self.frozen_params = ["beta", "gamma", "delta", "i0", "t0", "t_outdt"]

        self.fit_data = self.active
        self.fit_fun = self.fit_I
Esempio n. 15
0
    def _read_pop(self, populationdata):
        #开始正是读取人口数据
        df = pd.read_csv(populationdata)
        country_properties1 = {}
        for index, row in df.iterrows():
            try:
                ccc = coco.convert(names=row['Name'], to="name_short")
                country_properties1[ccc] = float(row['Population'])
            except:
                print(row['Name'])
                print(row['Population'])

        self.country_pop = {}
        print('second')
        for country, i in self.nodes.items():
            if self.country_pop.get(country, -1) > 0:
                print('duplicate:', country)
            else:
                pop = country_properties1.get(country, 1)
                self.country_pop[country] = pop
        print('These countries have no pop data:')
        for k, v in self.country_pop.items():
            if v <= 0:
                print(k)
        self.population = np.ones(len(self.nodes))
        for cc, i in self.nodes.items():
            self.population[i] = self.country_pop.get(cc, 1.0)
Esempio n. 16
0
def is_country_present(dataframe, country):
    """ Checks if user input country is a valid country in the dataset """
    iso3 = coco.convert(names=country, to='ISO3')
    # process some error if coco can't convert to a country

    in_dataset = iso3 in dataframe['iso3']
    return in_dataset
Esempio n. 17
0
def country(code):
    code = phonenumbers.region_code_for_country_code(code)
    print(code)
    if code == "ZZ":
        return jsonify({"message": "Resource not found"}), 404
    return jsonify(
        {"country": country_converter.convert(code, to="short_name")})
Esempio n. 18
0
def get_emoji_country(territory):
    """ Gets an emoji for the country"""

    try:
        return flag.flag(convert(names=[territory], to="ISO2"))
    except ValueError:
        return ""
Esempio n. 19
0
def get_ip_info(dom):

    try:
        ip_address = socket.gethostbyname(dom)
        who_is = IPWhois(ip_address).lookup_rdap()
        # pprint.pprint(who_is)
        country_code = who_is['asn_country_code']
        registration_date = who_is['asn_date']
        date_now = date.today()
        rdate = date(*map(int, registration_date.split('-')))
        difference = date_now - rdate

        if difference > datetime.timedelta(days=365):
            dom_age_gt_1year = 1
        else:
            dom_age_gt_1year = 0

        if country_code == '  ':
            country_name = 'Unknown'
        else:
            # convert the 2char country code into a short name ie US = United States
            country_name = coco.convert(country_code, to='name_short')
            if 'Not' in country_name:
                country_name = 'Unknown'
            else:
                country_name = country_name.replace(" ", "_")

        return country_name, dom_age_gt_1year

    except Exception:
        return 'Unknown', 0
Esempio n. 20
0
def list_to_iso3(OBOR_country_list: list):

    OBOR_country_iso3 = country_converter.convert(names=OBOR_country_list, to= 'ISO3')
    if len(OBOR_country_iso3)==len(OBOR_country_list):
        return OBOR_country_iso3
    else:
        return None
Esempio n. 21
0
def get_cnt(cnts):
    """extract ISO from country field, replace unknown ISOs, make sure no duplicating bad names"""

    repl = {
        '2014 Revised Strategic Response Plan Sudan': 'Sudan',
        '2016 Humanitarian Response Plan': 'Iraq',
        '2017 Humanitarian Needs Overview Congo': 'Congo',
        'Emergency Humanitarian Response Plan REVISION 2008': 'Kenya',
        'Philippine: Typhoon Haiyan (Yolanda) Strategic Response Plan 2014':
        'Philippines',
        'Strategic Response Plan 2014 Occupied Palestian Territory':
        'Palestine',
        'Sudanese Red Crescent Society Emergency appeal 2014': 'Sudan',
        'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE.pdf': 'Turkey',
        'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE': 'Turkey',
        'Regional Refugee &amp; Resilience Plan 2015-2016': 'Syria',
        'JRP For Rohingya Humanitarian Crisis.pdf': 'Bangladesh',
        'Regional Refugee and Resilience Plan 2016-2017': 'Syria',
        'Regional Refugee and Resilience Plan 2017-2018': 'Syria'
    }

    cnts = [repl[v] if v in repl else v for v in cnts]

    assert (len([v for v in cnts
                 if v in repl]) == len(set([v for v in cnts if v in repl])))
    assert ('not found' not in cnts)

    return coco.convert(names=cnts, to='ISO3')
Esempio n. 22
0
    def ensure_country_name(self, country):
        """
        Ensure that the country name is correct.
        If not, the correct country name will be found.

        Args:
            country (str): country name

        Returns:
            str: country name
        """
        df = self._ensure_dataframe(self._cleaned_df,
                                    name="the cleaned dataset",
                                    columns=[self.COUNTRY])
        selectable_set = set(df[self.COUNTRY].unique())
        # return country name as-is if selectable
        if country in selectable_set:
            return country
        # Convert country name
        converted = coco.convert(country, to="name_short", not_found=None)
        # Additional abbr
        abbr_dict = {
            "Congo Republic": "Republic of the Congo",
            "DR Congo": "Democratic Republic of the Congo",
            "UK": "United Kingdom",
            "Vatican": "Holy See",
        }
        name = abbr_dict.get(converted, converted)
        # Return the name if registered in the dataset
        if name in selectable_set:
            return name
        raise SubsetNotFoundError(country=country, country_alias=name)
Esempio n. 23
0
def parse_hopkins(file_name):

        with open(file_name) as f:
            hopkins_data = json.load(f)

        #contains other (numbered) dictionaries that contain the info
        hopkins_data_dict = hopkins_data['data']

        #data lists
        countries_list = []
        confirmed_list = []
        deaths_list = []
        recovered_list = []

        #add info to the corresponding lists
        for i in range(0, len(hopkins_data_dict)):
            countries_list.append(hopkins_data_dict[str(i)]['location'])
            confirmed_list.append(hopkins_data_dict[str(i)]['confirmed'])
            deaths_list.append(hopkins_data_dict[str(i)]['deaths'])
            recovered_list.append(hopkins_data_dict[str(i)]['recovered'])

        ISO_3_list = coco.convert(names = countries_list, to = 'ISO3')

        compiled_data = pd.DataFrame(
            {
                "location_code": ISO_3_list,
                "confirmed_cases": confirmed_list,
                "deaths": deaths_list,
                "recovered": recovered_list,
                "dt": file_name[37:-5]
            })

        print(compiled_data)
        #substring includes the date and time of the file
        compiled_data.to_json('./Scraper/Data/parsed_data_' + file_name[29:])
Esempio n. 24
0
	def run(self, dispatcher, tracker, domain):
            from newsapi import NewsApiClient
            
            # Init
            newsapi = NewsApiClient(api_key='f908755783e34e738776e64eeacfbd17')

            print(tracker.get_slot('country'))

            country = tracker.get_slot('country')
            
            country_iso2 = coco.convert(names=country, to='ISO2', not_found=None)

            print(country_iso2)

            country_iso2_low = country_iso2.lower()

            # /v2/top-headlines
            top_headlines = newsapi.get_top_headlines(#q=country,
                                                    #sources='bbc-news,the-verge',
                                                    country=country_iso2_low)

            news_json = json.dumps(top_headlines)

            list_articles = json.loads(news_json)

            for articles in list_articles['articles']:
                print('Author: ' + str(articles['author']))
                print('Source: ' + str(articles['source']['name']))
                print('Title: ' + str(articles['title']))
                print('URL: ' + str(articles['url']) + "\n")
                print("############################################################")
                dispatcher.utter_message('**Source:** ' + str(articles['source']['name']) + '\n **Title:** ' + str(articles['title']) + '\n **URL:** ' + str(articles['url']) + '\n')
 def bw_get_activity_info_manually(cls, input_act_str, db_name, input_act_amount):
     # Extract the activity name
     apostrophes = [(m.start(0), m.end(0)) for m in re.finditer("'", input_act_str)]
     if len(apostrophes) == 1:
         ap_start = 0
         ap_end = apostrophes[0][0]
     else:
         ap_start = apostrophes[0][1]
         ap_end = apostrophes[1][0]
     input_act_name = input_act_str[ap_start:ap_end]
     input_act_unit_loc = input_act_str[input_act_str.find("("): input_act_str.find(")") + 1]
     input_act_unit_loc_split = [
         re.sub('[^-A-Za-z0-9-€-]', ' ', el).rstrip().lstrip() for el in input_act_unit_loc.split(',')
     ]
     input_act_unit = input_act_unit_loc_split[0]
     input_act_location = input_act_unit_loc_split[1]
     # Add comment when activity cannot be found
     input_act_values_dict = {}
     if 'exiobase' in db_name.lower() and "Manufacture of " in input_act_name:
         input_act_name = input_act_name[15:].capitalize()
     input_act_values_dict['name'] = input_act_name
     input_act_values_dict['unit'] = input_act_unit
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         location_iso2 = coco.convert(names=input_act_location, to='ISO2')
     if location_iso2 == "not found":
         location_iso2 = input_act_location
     input_act_values_dict['location'] = location_iso2
     input_act_values_dict['amount'] = input_act_amount
     input_act_values_dict['database'] = db_name
     input_act_values_dict['type'] = ACTIVITY_TYPE_DICT['process']  # TODO remove hardcoding
     input_act_values_dict['comment'] = 'TODO could not find this activity'
     return input_act_values_dict
Esempio n. 26
0
    def pull_base(self):
        """
            get the base table for incoming/outgoing movements, and then use it to generate the other 2 tables
        """
        LOC = '../d0cz/unhcr_popstats_export_persons_of_concern_2018_06_20.csv'
        ref = pd.read_csv(LOC, skiprows=3)
        ref.columns = [
            'ref_' + v.lower().strip().replace(' ', '_')
            for v in ref.columns.values
        ]

        # only rename weird cols, keep the rest. total:
        """
        ref_year
        ref_destination
        ref_origin
        ref_refugees
        ref_asylum-seekers
        ref_returned_refugees
        ref_idps
        ref_returned_idps
        ref_stateless_persons
        ref_others_of_concern
        ref_total_population
        """
        ref.rename(
            {
                'ref_country_/_territory_of_asylum/residence':
                'ref_destination',
                'ref_refugees_(incl._refugee-like_situations)': 'ref_refugees',
                'ref_asylum-seekers_(pending_cases)': 'ref_asylum-seekers',
                'ref_internally_displaced_persons_(idps)': 'ref_idps'
            },
            axis=1,
            inplace=True)

        # add in ISOs, uid
        trans = {v: coco.convert(names=v, to='ISO3') \
                 for v in list(set(list(ref.ref_destination.values) + list(ref.ref_origin.values)))}
        trans['Serbia and Kosovo (S/RES/1244 (1999))'] = 'SRB'
        trans['Tibetan'] = 'CHN'

        ref['ref_dest_iso3'] = ref.apply(lambda x: trans[x['ref_destination']],
                                         axis=1)
        ref['ref_org_iso3'] = ref.apply(lambda x: trans[x['ref_origin']],
                                        axis=1)
        assert (list not in [type(v) for v in trans.values()])

        # clean cols
        tc = [
            'ref_refugees', 'ref_asylum-seekers', 'ref_returned_refugees',
            'ref_idps', 'ref_returned_idps', 'ref_stateless_persons',
            'ref_others_of_concern', 'ref_total_population'
        ]

        for c in tc:
            ref[c] = ref.apply(lambda x: self.clean_col(x[c]), axis=1)

        return ref
Esempio n. 27
0
def addContinentAndRegionColumn(data_frame):
    country_names, set_indices = np.unique(data_frame['Country'],
                                           return_inverse=True)

    name_count = len(country_names)

    continents = np.full(name_count, '       ')
    regions = np.full(name_count, '                         ')

    for i in range(name_count):
        continents[i] = coco.convert(names=country_names[i], to='continent')
        regions[i] = coco.convert(names=country_names[i], to='UNregion')

    data_frame.insert(5, 'Continent', continents[set_indices])
    data_frame.insert(5, 'Region', regions[set_indices])

    return data_frame
Esempio n. 28
0
def call_conan(country, to="isocode"):
    coco_logger = coco.logging.getLogger()
    coco_logger.setLevel(logging.CRITICAL)

    # https://stackoverflow.com/a/57986495/10295948
    warnings.filterwarnings("ignore")

    return coco.convert(country, to=to, not_found=None)
Esempio n. 29
0
    def _name_combine(self, flowdata, casedata, populationdata):
        # 首先,我们要校准各个国家的名称,做法是分别把航空流量数据、国家人口数据和病例数据中的国家字段都加载进来,统一转换为标准国家名称
        # flow data
        first_date = datetime.datetime(2019, 12, 1, 0, 0)
        day_len = (datetime.datetime.strptime('2020-12-31', '%Y-%m-%d') -
                   first_date).days
        fijt = []
        countries = set([])
        for day in range(day_len):
            dd = (first_date +
                  datetime.timedelta(days=day)).strftime("%Y-%m-%d")
            flushPrint(dd)
            filename = flowdata + dd + '.csv'
            df = pd.read_csv(filename)
            countries1 = set(df['Unnamed: 0'])
            countries2 = set(list(df.columns)[1:])
            countries = countries | countries1 | countries2
            flushPrint(len(countries))
        countries_temp = list(coco.convert(names=countries, to="name_short"))
        countries1 = set(countries_temp)

        self.name_map = {}
        for i, cc in enumerate(countries):
            vv = self.name_map.get(cc, '')
            if len(vv) == 0:
                self.name_map[cc] = countries_temp[i]
        print(len(countries), len(countries1))

        # case data
        df = pd.read_csv(casedata)
        countries = set(df['country'][1:])
        countries2 = set(coco.convert(names=countries, to="name_short"))
        print(len(countries), len(countries2))

        # population data
        df = pd.read_csv(populationdata)
        countries = set(df['Name'][1:])
        countries3 = set(coco.convert(names=countries, to="name_short"))
        print(len(countries), len(countries3))

        countries = countries1 | countries2 | countries3
        self.nodes = {}
        for country in countries:
            if country != 'not found':
                idx = self.nodes.get(country, len(self.nodes))
                self.nodes[country] = idx
Esempio n. 30
0
def findLocation(locstr):
    """
    Converts a string of location into a Location object

    :param locstr: location, in any format (str)
    :return: Location object with country, state and city separated (Location object)
    """

    # Uses Location IQ API call to convert string to standardised format
    url = "https://us1.locationiq.com/v1/search.php"
    # BUG: If job is in CA, LocationIQ reads as Canada. Can add countrycodes, but am right now just appending USA to the end is working fine.
    data = {
        'key': 'f44315769abf5d',
        'q': str(locstr),
        'format': 'json',
        'normalizecity': '1',
        'addressdetails': '1',
        'statecode': '1'
    }

    # Tries 5 times in case code encounters a rate limit
    for i in range(5):
        response = json.loads(requests.get(url, params=data).text)

        # Tests for whether request limit or other error has happened with LocationIQ. Tries again after sleeping to overcome rate limits
        if type(response) == dict:
            if "error" in response:
                if response["error"] == "Rate Limited Second":
                    print(
                        "LocationIQ second rate limit exceeded. Trying again in 1 second..."
                    )
                    time.sleep(1)
                    continue
                elif response["error"] == "Rate Limited Minute":
                    print(
                        "LocationIQ minute rate limit exceeded. Trying again in 1 minute..."
                    )
                    time.sleep(60)
                    continue
                elif response["error"] == "Rate Limited Day":
                    print(
                        "LocationIQ day rate limit exceeded. Try again in a day, and find a way to reduce requests in future."
                    )
                    # TODO: change exit calls to Error calls and handle as needed.
                    exit(405)
                else:
                    print("LocationIQ returned unknown error %s." %
                          str(response["error"]))
                    exit(405)
        break

    # Isolates important information to create Location object
    address = response[0]["address"]
    city = address["city"]
    state = address["state_code"]
    country = coco.convert(names=address["country_code"], to='ISOnumeric')

    return Location(country, state.upper(), city)
Esempio n. 31
0
def pull():
    LOC = '../d0cz/180417 Homepage dashboard data.xlsx'
    hist = pd.read_excel(LOC, sheet_name='DATA FUNDING & BENEFICIARIES')

    # colz
    hist.columns = hist.columns.str.lower()

    hist = hist.rename(
        {'country': 'country',
         'year': 'year',
         'uploaded': 'uploaded',
         'quarter': 'quarter',
         'status': 'status',
         'url': 'url',
         'region': 'region',
         'type of crisis': 'type',
         'lead': 'lead',
         'co-chair / co-lead': 'co_lead',
         'funding received': 'funding_received',
         'funding required': 'funding_required',
         'funding coverage': 'funding_coverage',
         'count': 'count',
         '# of partners': 'num_partner',
         '# of people reached in total': 'num_reached_tot',
         '# of people targeted in total': 'num_targeted_tot',
         'coverage against target': 'cov_against_target',
         '# of people reached with nfi': 'num_reached_nfi',
         '# of people targeted with nfi': 'num_target_nfi',
         '# of people reached with shelter': 'num_reached_shelt',
         '# of people targeted with shelter': 'num_targ_shelt',
         'data from': 'data_from',
         'source/comments': 'source_comment'}, axis=1
    )
    hist.columns = ['dash_' + v for v in hist.columns]

    # drop colz
    hist.drop('dash_unnamed: 20', axis=1, inplace=True)
    hist.drop('dash_coverage against target.1', axis=1, inplace=True)
    hist.drop('dash_unnamed: 24', axis=1, inplace=True)
    hist.drop('dash_coverage against target.2', axis=1, inplace=True)

    # drop rowz
    hist = hist[hist['dash_country'] != 'Pacific Region']

    # new colz (new boyz)
    hist['dash_iso3'] = hist['dash_country'].apply(lambda x: coco.convert(names=x, to='ISO3'))
    hist['dash_uid'] = hist['dash_iso3'] + hist['dash_year'].map(str)

    # rm dupz
    hist = hist.drop_duplicates(subset='dash_uid', keep='last')

    return hist
Esempio n. 32
0
def get_cnt(cnts):
    """extract ISO from country field, replace unknown ISOs, make sure no duplicating bad names"""

    repl = {'2014 Revised Strategic Response Plan Sudan': 'Sudan',
            '2016 Humanitarian Response Plan': 'Iraq',
            '2017 Humanitarian Needs Overview Congo': 'Congo',
            'Emergency Humanitarian Response Plan REVISION 2008': 'Kenya',
            'Philippine: Typhoon Haiyan (Yolanda) Strategic Response Plan 2014': 'Philippines',
            'Strategic Response Plan 2014 Occupied Palestian Territory': 'Palestine',
            'Sudanese Red Crescent Society Emergency appeal 2014': 'Sudan',
            'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE.pdf': 'Turkey',
            'REGIONAL REFUGEE AND MIGRANT RESPONSE PLAN FOR EUROPE': 'Turkey',
            'Regional Refugee &amp; Resilience Plan 2015-2016': 'Syria',
            'JRP For Rohingya Humanitarian Crisis.pdf': 'Bangladesh',
            'Regional Refugee and Resilience Plan 2016-2017': 'Syria',
            'Regional Refugee and Resilience Plan 2017-2018': 'Syria'
            }

    cnts = [repl[v] if v in repl else v for v in cnts]

    assert (len([v for v in cnts if v in repl]) == len(set([v for v in cnts if v in repl])))
    assert ('not found' not in cnts)

    return coco.convert(names=cnts, to='ISO3')