def __init__(self): ''' Constructor ''' self._extractor = Extractor() # will store a dictionary with the necessary data self.vis_data = None
def test_cache(self): #host = "localhost" host = "lis.irb.hr" extractor = Extractor() # enable cache, but use a test DB extractor.enable_cache(host, 27017, test=True) extractor.clear_cache() # grab some data arg = extractor.arg() arg["country_codes"] = ["hrv", "usa"] arg["interval"] = (1997, 1999) arg["indicator_codes"] = ["SP.POP.TOTL"] countries = extractor.grab(arg) # see if it's cached self.assertEqual(extractor.is_cached(arg), True, "Countries must be cached after grab") arg["country_codes"].append("fin") self.assertEqual(extractor.is_cached(arg), False, "Countries must match to give a cache hit") arg["country_codes"] = ["hrv", "usa"] arg["interval"] = (1996, 1999) self.assertEqual(extractor.is_cached(arg), False, "Years must match to give a cache hit") arg["interval"] = (1997, 1999) arg["indicator_codes"].append("FR.INR.RINR") self.assertEqual(extractor.is_cached(arg), False, "Indicators must match to give a cache hit") # grab some more data and see if there are duplicate countries countries = extractor.grab(arg) country_count = len([c for c in extractor._cacher.db.countries.find()]) self.assertEqual(country_count, 2, "Grabing a wider set must not leave duplicates!")
def __init__(self): ''' Constructor ''' self._counter = 0 self._got_items = False # initialize default configuration options #TODO: IVisualisation shouldn't use the extractor at all, but rely on a data organiser self._extractor = Extractor() if conf.cache_enabled: self._extractor.enable_cache(conf.cache_host, conf.cache_port)
def __init__(self, look_back_years, cache_enabled=False, cache_host="localhost", cache_port=27017): ''' Constructor ''' self.t_loc = conf.sample_selection_file self.extractor = Extractor() self.cache_enabled = cache_enabled if self.cache_enabled: self.extractor.enable_cache(cache_host, cache_port) self.look_back_years = look_back_years self.preprocessor = Preprocessor() # sample set placeholders self.crisis_samples = [] self.normal_samples = [] self.metadata = Metadata(conf, look_back_years)
def test_extractor(self): extractor = Extractor() countries = extractor.grab() self.assertTrue(len(countries) > 0) arg = extractor.arg() arg["country_codes"] = ["usa", "hrv"] arg["indicator_codes"] = ["SP.POP.TOTL", "SL.TLF.PART.MA.ZS"] arg["interval"] = (2005, 2006) countries = extractor.grab(arg) self.assertTrue(len(countries) > 0) arg = extractor.arg() arg["country_codes"] = ["hrv"] arg["indicator_codes"] = ["SP.POP.TOTL"] arg["interval"] = (1998, 1999) countries = extractor.grab(arg) indicator = countries[0].get_indicator("SP.POP.TOTL") #print indicator.get_values() self.assertEqual(indicator.values, [4501000.0, 4554000.0]) self.assertEqual(indicator.dates, [1998, 1999])
# <nbformat>3.0</nbformat> # <markdowncell> # Reading data # =========== # In this part we'll get the crisis years from a published IMF data set. # # First let's get the standard country codes coresponding to these countries using dracula. # <codecell> import inspect from dracula.extractor import Extractor import dracula extractor = Extractor() country_codes = {} countries = extractor.grab_metadata("countries") print(inspect.getsourcelines(dracula.wb.parser.parse_multiple_countries_alone)) for country in countries: #print(dir(country)) country_codes[country.name] = country.code print(country_codes) # <markdowncell> # Manual fixing # <codecell> country_codes["Serbia, Republic of"] = 'SRB'