def test_parse_sample_selection(self): input = Input() (crises_dates, normal_dates) = input.parse_sample_selection("./foc/forecaster/tests/test_sample_selection.xls") expected_crises = {"USA":[2009,2001], "DEU":[]} expected_normal = {"USA":[1994,1995,1996], "DEU":[1994,1995]} self.assertEqual(crises_dates, expected_crises) self.assertEqual(normal_dates, expected_normal)
def __init__(self, db_location): ''' Constructor ''' self.input = Input() self.crises, self.normals = self.input.parse_sample_selection( db_location)
class CrisisSeer(object): ''' Knows when a crisis occured in a certain country (based on the crisis database) ''' def __init__(self, db_location): ''' Constructor ''' self.input = Input() self.crises, self.normals = self.input.parse_sample_selection( db_location) def get_crisis_years(self, country_code): """ return a list of years when this country had crises """ events_list = [] try: events_list = self.crises[country_code] except KeyError: # this country has no noted crises so we'll suppose # it's crisis-free pass return events_list
class CrisisSeer(object): ''' Knows when a crisis occured in a certain country (based on the crisis database) ''' def __init__(self, db_location): ''' Constructor ''' self.input = Input() self.crises, self.normals = self.input.parse_sample_selection(db_location) def get_crisis_years(self, country_code): """ return a list of years when this country had crises """ events_list = [] try: events_list = self.crises[country_code] except KeyError: # this country has no noted crises so we'll suppose # it's crisis-free pass return events_list
def build_from_crises_file(self, country_codes, feature_indicators, test_percentage): """ Entry method that builds a samples set by fetching the data using the extractor. Classes are determined from a crisis XLS file. sparse - if True it fetches the data for the necessary years only. Shown to be non-efficient. """ # clear the sample sets self.crisis_samples = [] self.normal_samples = [] # get the years classified as crises / normal periods dates_input = Input() t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc) crises_list, normal_list = dates_input.parse_sample_selection_to_list( self.t_loc) if country_codes[ 0] == "EVERYTHING": # we take everything available in the samples set wb_countries = self.extractor.grab_metadata("countries") wb_country_codes = set([country.code for country in wb_countries]) samples_definition_codes = set(t_crises.keys()) | set( t_normal.keys()) country_codes = list(wb_country_codes & samples_definition_codes) country_codes.sort() # we fetch all the data here # boundaries start_date = min(min(crises_list), min(normal_list)) - conf.look_back_years end_date = max(max(crises_list), max(normal_list)) arg = self.extractor.arg() arg["country_codes"] = country_codes arg["indicator_codes"] = feature_indicators arg["interval"] = (start_date, end_date) arg["pause"] = conf.wb_pause countries = self.extractor.grab(arg) if self.cache_enabled and self.extractor.was_cached(): print("Cache was hit, didn't have to query the World Bank API.") elif self.cache_enabled: print("Data wasn't cached, queried the World Bank API.") # assign the samples for country in countries: # fetch all the indicators for target country indicators = [] for ind_code in feature_indicators: indicator = country.get_indicator(ind_code) indicators.append(indicator) # create samples from those indicators - in crises... try: crisis_years = t_crises[country.code] except KeyError: continue # we skip this country new_samples = self.assign_samples(indicators, crisis_years, CRISIS_CLASS, country.code) self.crisis_samples.extend(new_samples) # ... and in normal periods normal_years = t_normal[country.code] new_samples = self.assign_samples(indicators, normal_years, NORMAL_CLASS, country.code) self.normal_samples.extend(new_samples) return self.divide(self.crisis_samples, self.normal_samples, test_percentage)
def build_from_crises_file(self, country_codes, feature_indicators, test_percentage): """ Entry method that builds a samples set by fetching the data using the extractor. Classes are determined from a crisis XLS file. sparse - if True it fetches the data for the necessary years only. Shown to be non-efficient. """ # clear the sample sets self.crisis_samples = [] self.normal_samples = [] # get the years classified as crises / normal periods dates_input= Input() t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc) crises_list, normal_list = dates_input.parse_sample_selection_to_list(self.t_loc) if country_codes[0]=="EVERYTHING": # we take everything available in the samples set wb_countries = self.extractor.grab_metadata("countries") wb_country_codes = set([country.code for country in wb_countries]) samples_definition_codes = set(t_crises.keys()) | set(t_normal.keys()) country_codes = list(wb_country_codes & samples_definition_codes) country_codes.sort() # we fetch all the data here # boundaries start_date = min(min(crises_list), min(normal_list))-conf.look_back_years end_date = max(max(crises_list), max(normal_list)) arg = self.extractor.arg() arg["country_codes"] = country_codes arg["indicator_codes"] = feature_indicators arg["interval"] = (start_date, end_date) arg["pause"] = conf.wb_pause countries = self.extractor.grab(arg) if self.cache_enabled and self.extractor.was_cached(): print("Cache was hit, didn't have to query the World Bank API.") elif self.cache_enabled: print("Data wasn't cached, queried the World Bank API.") # assign the samples for country in countries: # fetch all the indicators for target country indicators = [] for ind_code in feature_indicators: indicator = country.get_indicator(ind_code) indicators.append(indicator) # create samples from those indicators - in crises... try: crisis_years = t_crises[country.code] except KeyError: continue # we skip this country new_samples = self.assign_samples(indicators, crisis_years, CRISIS_CLASS, country.code) self.crisis_samples.extend(new_samples) # ... and in normal periods normal_years = t_normal[country.code] new_samples = self.assign_samples(indicators, normal_years, NORMAL_CLASS, country.code) self.normal_samples.extend(new_samples) return self.divide(self.crisis_samples, self.normal_samples, test_percentage)
def __init__(self, db_location): ''' Constructor ''' self.input = Input() self.crises, self.normals = self.input.parse_sample_selection(db_location)