def parse_multiple_countries(data_list, nominal_start=None, nominal_end=None): """ @param nominal_start: what start year did the query ask for (not necessarily get) @param nominal_end: what end year did the query ask for (not necessarily get) @return: list of countries """ #get_country_id = lambda item : item['country']['id'] # When it's a list of indicator the World bank returns ISO2 codes labeled 'id' get_country_id_iso2 = lambda item: item['country']['id'] countries = [] #data_list.sort(key = get_country_id) while len(data_list) > 0: current_id_iso2 = get_country_id_iso2(data_list[0]) country_data = filter( lambda item: get_country_id_iso2(item) == current_id_iso2, data_list) # we now basically don't know the country's "real" (iso3) id country = Country("") country.code_iso2 = current_id_iso2 indicator = parse_single_country(country_data, nominal_start, nominal_end) country.set_indicator(indicator) countries.append(country) # remove parsed data from the list #TODO: data_list.remove(i) data_list = [item for item in data_list if item not in country_data] return countries
def parse_multiple_countries_alone(data_list): """ Parse a query for countries without indicators @return: list of countries """ get_country_id = lambda item: item['id'] get_country_id_iso2 = lambda item: item['iso2Code'] countries = [] data_list.sort(key=get_country_id) for item in data_list: current_id = get_country_id(item) current_id_iso2 = get_country_id_iso2(item) country = Country(current_id) country.code_iso2 = current_id_iso2 country.name = item['name'] countries.append(country) return countries
def parse_multiple_countries_alone(data_list): """ Parse a query for countries without indicators @return: list of countries """ get_country_id = lambda item : item['id'] get_country_id_iso2 = lambda item : item['iso2Code'] countries = [] data_list.sort(key = get_country_id) for item in data_list: current_id = get_country_id(item) current_id_iso2 = get_country_id_iso2(item) country = Country(current_id) country.code_iso2 = current_id_iso2 country.name = item['name'] countries.append(country) return countries
def query_multiple_data(country_codes=['all'], indicator_codes=[], start_date=2010, end_date=2011): """ Perform several queries if necessary to get a multiple indicator tables. Parse them and return them as country objects @param country_codes: list of country codes to fetch e.g. ['usa','bra'] @param indicator_codes: list of indicator codes to fetch @param start_date: a year from which indicator_codes that we want to fetch should start @param end_date: a year from which indicator_codes that we want to fetch should end @param pause: a pause in number of seconds between two queries (to ease the load on the World Bank API); unused in this method @return: a list of country objects """ countries = [] for country_code in country_codes: country_data = _data[str(country_code.upper())] country = Country(country_code) for indicator_code in indicator_codes: indicator_data_full = country_data[indicator_code] #indicator_data = indicator_data_full[start_date:end_date] wanted_years = np.arange(start_date, end_date) indicator_data = indicator_data_full.ix[wanted_years].dropna() #TODO: Figure out how to treat zeros in RCA data (for now I am treating them as missing values). indicator_data = indicator_data[indicator_data != 0] dates = list(indicator_data.index) values = list(indicator_data.values) indicator = Indicator(code=indicator_code, dates=dates, values=values, nominal_start=start_date, nominal_end=end_date) country.set_indicator(indicator) countries.append(country) return countries
def parse_multiple_countries(data_list, nominal_start=None, nominal_end=None): """ @param nominal_start: what start year did the query ask for (not necessarily get) @param nominal_end: what end year did the query ask for (not necessarily get) @return: list of countries """ #get_country_id = lambda item : item['country']['id'] # When it's a list of indicator the World bank returns ISO2 codes labeled 'id' get_country_id_iso2 = lambda item : item['country']['id'] countries = [] #data_list.sort(key = get_country_id) while len(data_list)>0: current_id_iso2 = get_country_id_iso2(data_list[0]) country_data = filter(lambda item : get_country_id_iso2(item) == current_id_iso2, data_list) # we now basically don't know the country's "real" (iso3) id country = Country("") country.code_iso2=current_id_iso2 indicator = parse_single_country(country_data, nominal_start, nominal_end) country.set_indicator(indicator) countries.append(country) # remove parsed data from the list #TODO: data_list.remove(i) data_list = [item for item in data_list if item not in country_data] return countries
def retreive(self, arg): """ Checks the cache for data defined by arg and returns it if it's there @param arg: same as for Extractor.grab() @return: data specified by the arg or None if it's a cache miss """ # deserialize what's in the DB countries_raw = list(self.db.countries.find()) cached_countries = [Country.from_json(country) for country in countries_raw] # let's pick what we need from it target_countries = [] contains_countries, contains_indicators, contains_years = True, True, True wanted_countries = set(arg["country_codes"]) for country in cached_countries: # add the countries we need if country.code in wanted_countries: target_countries.append(country) wanted_countries.remove(country.code) # now for that country, let's go see if we have the indicators wanted_indicators = set(arg["indicator_codes"]) for indicator in country.indicators.values(): if indicator.code in wanted_indicators: wanted_indicators.remove(indicator.code) # cool, it's here; now let's check for its years begin_date = indicator.nominal_start end_date = indicator.nominal_end interval_satisfied = (arg["start_date"]>=begin_date and arg["end_date"]<=end_date) if interval_satisfied: #all past indicators contained years # now we set the data to be # only the years that were asked for indicator = indicator.slice(arg["start_date"], arg["end_date"]) else: contains_years = False else: # we don't even want that one country.indicators.pop(indicator.code) if len(wanted_indicators)>0: contains_indicators = False if len(wanted_countries) > 0: # empty # all countries covered contains_countries = False # all 3 conditions must be true, no data can be missing if contains_countries and contains_indicators and contains_years: return target_countries else: # we have a cache miss return None
def test_add_indicators_to_countries(self): c01=Country("country1") c01.code_iso2="c1" c02=Country("country2") c02.code_iso2="c2" countries = [c01,c02] ind1=Indicator("ind1", [1,2], [10,20]) ind2=Indicator("ind2", [1,2], [20,30]) c1 = Country("") c1.code_iso2="c1" c1.set_indicator(ind1) c11 = Country("") c11.code_iso2="c1" c11.set_indicator(ind2) ind3=Indicator("ind1", [1,2], [230,240]) ind4=Indicator("ind2", [1,2], [330,340]) c2 = Country("") c2.code_iso2="c2" c2.set_indicator(ind3) c22 = Country("") c22.code_iso2="c2" c22.set_indicator(ind4) countries1 = [c1, c2] countries2 = [c11, c22] country_indicators = [countries1, countries2] parser.add_indicators_to_countries(countries, country_indicators) self.assertEqual(len(countries), 2) self.assertEqual(len(countries[0].get_indicator("ind1").values), 2)