Exemple #1
0
def parse_multiple_countries(data_list, nominal_start=None, nominal_end=None):
    """
    @param nominal_start: what start year did the query ask for (not necessarily get)
    @param nominal_end: what end year did the query ask for (not necessarily get)
    @return: list of countries
    """
    #get_country_id = lambda item : item['country']['id']
    # When it's a list of indicator the World bank returns ISO2 codes labeled 'id'
    get_country_id_iso2 = lambda item: item['country']['id']
    countries = []
    #data_list.sort(key = get_country_id)
    while len(data_list) > 0:
        current_id_iso2 = get_country_id_iso2(data_list[0])
        country_data = filter(
            lambda item: get_country_id_iso2(item) == current_id_iso2,
            data_list)
        # we now basically don't know the country's "real" (iso3) id
        country = Country("")
        country.code_iso2 = current_id_iso2
        indicator = parse_single_country(country_data, nominal_start,
                                         nominal_end)
        country.set_indicator(indicator)
        countries.append(country)
        # remove parsed data from the list
        #TODO: data_list.remove(i)
        data_list = [item for item in data_list if item not in country_data]
    return countries
Exemple #2
0
def parse_multiple_countries_alone(data_list):
    """
    Parse a query for countries without indicators
    @return: list of countries
    """
    get_country_id = lambda item: item['id']
    get_country_id_iso2 = lambda item: item['iso2Code']
    countries = []
    data_list.sort(key=get_country_id)
    for item in data_list:
        current_id = get_country_id(item)
        current_id_iso2 = get_country_id_iso2(item)
        country = Country(current_id)
        country.code_iso2 = current_id_iso2
        country.name = item['name']
        countries.append(country)
    return countries
Exemple #3
0
def parse_multiple_countries_alone(data_list):
    """
    Parse a query for countries without indicators
    @return: list of countries
    """
    get_country_id = lambda item : item['id']
    get_country_id_iso2 = lambda item : item['iso2Code']
    countries = []
    data_list.sort(key = get_country_id)
    for item in data_list:
        current_id = get_country_id(item)
        current_id_iso2 = get_country_id_iso2(item)
        country = Country(current_id)
        country.code_iso2 = current_id_iso2
        country.name = item['name']
        countries.append(country)
    return countries
Exemple #4
0
def query_multiple_data(country_codes=['all'],
                        indicator_codes=[],
                        start_date=2010,
                        end_date=2011):
    """
    Perform several queries if necessary to get a multiple indicator tables.
    Parse them and return them as country objects
    @param country_codes: list of country codes to fetch e.g. ['usa','bra']
    @param indicator_codes: list of indicator codes to fetch
    @param start_date: a year from which indicator_codes that we want to fetch should start
    @param end_date: a year from which indicator_codes that we want to fetch should end
    @param pause: a pause in number of seconds between two queries (to ease the load on the World Bank API); unused in this method
        
    @return: a list of country objects
    """
    countries = []
    for country_code in country_codes:
        country_data = _data[str(country_code.upper())]
        country = Country(country_code)
        for indicator_code in indicator_codes:
            indicator_data_full = country_data[indicator_code]
            #indicator_data = indicator_data_full[start_date:end_date]
            wanted_years = np.arange(start_date, end_date)
            indicator_data = indicator_data_full.ix[wanted_years].dropna()

            #TODO: Figure out how to treat zeros in RCA data (for now I am treating them as missing values).
            indicator_data = indicator_data[indicator_data != 0]

            dates = list(indicator_data.index)
            values = list(indicator_data.values)
            indicator = Indicator(code=indicator_code,
                                  dates=dates,
                                  values=values,
                                  nominal_start=start_date,
                                  nominal_end=end_date)
            country.set_indicator(indicator)
        countries.append(country)
    return countries
Exemple #5
0
def parse_multiple_countries(data_list, nominal_start=None, nominal_end=None):
    """
    @param nominal_start: what start year did the query ask for (not necessarily get)
    @param nominal_end: what end year did the query ask for (not necessarily get)
    @return: list of countries
    """
    #get_country_id = lambda item : item['country']['id']
    # When it's a list of indicator the World bank returns ISO2 codes labeled 'id'
    get_country_id_iso2 = lambda item : item['country']['id']
    countries = []
    #data_list.sort(key = get_country_id)
    while len(data_list)>0:
        current_id_iso2 = get_country_id_iso2(data_list[0])
        country_data = filter(lambda item : get_country_id_iso2(item) == current_id_iso2, data_list)
        # we now basically don't know the country's "real" (iso3) id
        country = Country("")
        country.code_iso2=current_id_iso2
        indicator = parse_single_country(country_data, nominal_start, nominal_end)
        country.set_indicator(indicator)
        countries.append(country)
        # remove parsed data from the list
        #TODO: data_list.remove(i)
        data_list = [item for item in data_list if item not in country_data]
    return countries
Exemple #6
0
 def retreive(self, arg):
     """
     Checks the cache for data defined by arg and returns it if it's there
     @param arg: same as for Extractor.grab() 
     @return: data specified by the arg or None if it's a cache miss
     """
     # deserialize what's in the DB
     countries_raw = list(self.db.countries.find())
     cached_countries = [Country.from_json(country) for country in countries_raw]
     # let's pick what we need from it
     target_countries = []
     contains_countries, contains_indicators, contains_years = True, True, True
     wanted_countries = set(arg["country_codes"])
     for country in cached_countries:
         # add the countries we need
         if country.code in wanted_countries:
             target_countries.append(country)
             wanted_countries.remove(country.code)
             # now for that country, let's go see if we have the indicators
             wanted_indicators = set(arg["indicator_codes"])
             for indicator in country.indicators.values():
                 if indicator.code in wanted_indicators:
                     wanted_indicators.remove(indicator.code)
                     # cool, it's here; now let's check for its years
                     begin_date = indicator.nominal_start
                     end_date = indicator.nominal_end
                     interval_satisfied = (arg["start_date"]>=begin_date 
                                           and arg["end_date"]<=end_date)
                     if interval_satisfied: #all past indicators contained years
                         # now we set the data to be
                         # only the years that were asked for 
                         indicator = indicator.slice(arg["start_date"], arg["end_date"])
                     else:
                         contains_years = False
                 else: # we don't even want that one
                     country.indicators.pop(indicator.code)
             if len(wanted_indicators)>0: contains_indicators = False
     if len(wanted_countries) > 0: # empty
         # all countries covered
         contains_countries = False
     # all 3 conditions must be true, no data can be missing
     if contains_countries and contains_indicators and contains_years:
         return target_countries
     else: # we have a cache miss
         return None
Exemple #7
0
 def test_add_indicators_to_countries(self):
     c01=Country("country1")
     c01.code_iso2="c1"
     c02=Country("country2")
     c02.code_iso2="c2"
     countries = [c01,c02]
     ind1=Indicator("ind1", [1,2], [10,20])
     ind2=Indicator("ind2", [1,2], [20,30])
     c1 = Country("")
     c1.code_iso2="c1"
     c1.set_indicator(ind1)
     c11 = Country("")
     c11.code_iso2="c1"
     c11.set_indicator(ind2)
     ind3=Indicator("ind1", [1,2], [230,240])
     ind4=Indicator("ind2", [1,2], [330,340])
     c2 = Country("")
     c2.code_iso2="c2"
     c2.set_indicator(ind3)
     c22 = Country("")
     c22.code_iso2="c2"
     c22.set_indicator(ind4)
     countries1 = [c1, c2]
     countries2 = [c11, c22]
     country_indicators = [countries1, countries2]
     parser.add_indicators_to_countries(countries, country_indicators)
     self.assertEqual(len(countries), 2)
     self.assertEqual(len(countries[0].get_indicator("ind1").values), 2)