Exemple #1
0
 def __init__(self):
     '''
     Constructor
     '''
     self._extractor = Extractor()
     self._counter = 0
     self._got_items = False
Exemple #2
0
 def __init__(self, look_back_years):
     '''
     Constructor
     '''
     self.t_loc = conf.sample_selection_file
     self.extractor = Extractor()
     self.look_back_years = look_back_years
     self.preprocessor = Preprocessor()
     # sample set placeholders
     self.crisis_samples = []
     self.normal_samples = []
     self.metadata = Metadata(conf, look_back_years)
Exemple #3
0
class Test(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.extractor = Extractor()

    def test_fetch_data(self):
        countries = self.extractor.fetch_data_per_conf(conf)
        self.assertTrue(len(countries) > 0)

    def test_fetch_indicator(self):
        indicator = self.extractor.fetch_indicator("hrv", "SP.POP.TOTL", 1998,
                                                   1999)
        #print indicator.get_values()
        self.assertEqual(indicator.get_values(), [4501000.0, 4554000.0])
        self.assertEqual(indicator.get_dates(), [1998, 1999])
Exemple #4
0
class Test(unittest.TestCase):
    
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.extractor = Extractor()


    def test_fetch_data(self):
        countries = self.extractor.fetch_data_per_conf(conf)
        self.assertTrue(len(countries)>0)
        
    def test_fetch_indicator(self):
        indicator = self.extractor.fetch_indicator("hrv", "SP.POP.TOTL", 1998, 1999)
        #print indicator.get_values()
        self.assertEqual(indicator.get_values(), [4501000.0, 4554000.0])
        self.assertEqual(indicator.get_dates(), [1998, 1999])
Exemple #5
0
 def __init__(self):
     '''
     Constructor
     '''
     self._extractor = Extractor()
     self._counter = 0
     self._got_items = False
Exemple #6
0
 def __init__(self, look_back_years):
     '''
     Constructor
     '''
     self.t_loc = conf.sample_selection_file
     self.extractor = Extractor()
     self.look_back_years = look_back_years
     self.preprocessor = Preprocessor()
     # sample set placeholders
     self.crisis_samples = []
     self.normal_samples = []
     self.metadata = Metadata(conf, look_back_years)
Exemple #7
0
class SamplesSet(object):
    '''
    Responsible for building train and test sets
    '''


    def __init__(self, look_back_years):
        '''
        Constructor
        '''
        self.t_loc = conf.sample_selection_file
        self.extractor = Extractor()
        self.look_back_years = look_back_years
        self.preprocessor = Preprocessor()
        # sample set placeholders
        self.crisis_samples = []
        self.normal_samples = []
        self.metadata = Metadata(conf, look_back_years)
    
#    def buil_per_conf(self):
#        self.build_from_crises_file_from_crises_file(True)
#        pass
       
    def interesting_years_before(self, target_year):
        return range(target_year-self.look_back_years, target_year)
    
    def assign_samples(self, indicators, event_years, event_class, country_code="?"):
        # method creates machine learning samples from indicators
        # arguments:
        # event_years - years of crises or normal periods
        # (as specified in the sample selection file or in a rule)
        # classification - desired class corresponding to these years
        samples = []
        # select only interesting values from the indicator
        for event_year in event_years:
            interesting_years = self.interesting_years_before(event_year)
            try:
                features = []
                for indicator in indicators:
                    new_features = self.preprocessor.preprocess_indicator(indicator,
                                                                          interesting_years)
                    features.extend(new_features)
                sample_description = country_code.upper() + "-" + str(event_year)
                sample = Sample(features, event_class,
                                description=sample_description)
                samples.append(sample)
            except NonExistentDataError:
                pass
        return samples
    
    def convert_to_boundaries(self, event_years, look_back_years):
        """
        convert a list of event years and look back years into
        a list of 2-tuples of boundaries (begin_year, end_year)
        """
        boundaries = []
        for event_year in event_years:
            boundaries.append((event_year-look_back_years, event_year-1))
        return boundaries
    
    def events_to_boundaries(self, all_events, look_back_years):
        event_boundaries = {}
        for key, value in all_events.items():
            event_boundaries[key] = self.convert_to_boundaries(value, look_back_years)
        return event_boundaries    
    
    def combine_events(self, t_crises, t_normal):
        all_events = {}
        for key in t_crises:
            years = []
            years.extend(t_crises[key])
            years.extend(t_normal[key])
            all_events[key]=years
        return all_events
            
    def divide_single(self, samples, test_percentage):
        # divide a list of samples to train and test samples
        if test_percentage==0:
            train_samples = samples
            test_samples = []
        else:
            number_test =int(len(samples)*test_percentage)
            test_samples = sample(samples, number_test)
            train_samples = list(set(samples).difference(set(test_samples)))
        return train_samples, test_samples 
        
    def divide(self, crisis_samples, normal_samples, test_percentage):
        # same as divide_simple, only does that for both crisis and normal samples and combines them
        # into single train and test lists
        self.train_samples, self.test_samples = self.divide_single(crisis_samples, test_percentage)
        new_train_samples, new_test_samples = self.divide_single(normal_samples, test_percentage)
        self.train_samples.extend(new_train_samples)
        self.test_samples.extend(new_test_samples)
        return self.train_samples, self.test_samples
        
    
    def build_from_crises_file(self, country_codes, feature_indicators, test_percentage, sparse=True):
        """
        Entry method that builds a samples set by fetching the data using the extractor.
        Classes are determined from a crisis XLS file.
        
        sparse - if True it fetches the data for the necessary years only.
        """
        # clear the sample sets
        self.crisis_samples = []
        self.normal_samples = []
        # get the years classified as crises / normal periods
        dates_input= Input()
        t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc)
        crises_list, normal_list = dates_input.parse_sample_selection_to_list(self.t_loc)
        # download the data from the World Bank
        if sparse:
            # we fetch only what we need
            # all the events combined - important so that we can only download data near those years
            events = self.combine_events(t_crises, t_normal)
            event_boundaries = self.events_to_boundaries(events,                                                         
                                                         conf.look_back_years)
            countries = self.extractor.fetch_data_sparse(country_codes,
                                                         feature_indicators,
                                                         event_boundaries,
                                                         conf.wb_pause)
        else:
            # we fetch all the data first
            # boundaries
            start_date = min(min(crises_list), min(normal_list))-conf.look_back_years
            end_date = max(max(crises_list), max(normal_list))
            countries = self.extractor.fetch_data(country_codes,
                                                  feature_indicators,
                                                  start_date,
                                                  end_date,
                                                  conf.wb_pause)
        # assign the samples
        for country in countries:
            # fetch all the indicators for target country
            indicators = []
            for ind_code in feature_indicators:
                indicator = country.get_indicator(ind_code)
                indicators.append(indicator)
            # create samples from those indicators - in crises...
            crisis_years = t_crises[country.code]
            new_samples = self.assign_samples(indicators,
                                              crisis_years,
                                              CRISIS_CLASS,
                                              country.code)
            self.crisis_samples.extend(new_samples)
            # ... and in normal periods
            normal_years = t_normal[country.code]
            new_samples = self.assign_samples(indicators,
                                              normal_years,
                                              NORMAL_CLASS,
                                              country.code)
            self.normal_samples.extend(new_samples)
        return self.divide(self.crisis_samples, self.normal_samples, test_percentage)
            
    def build_by_condition(self, country_codes, indicators, feature_indicators, test_percentage):
        # determine crises according to some condition/rule
        raise NotImplemented
Exemple #8
0
class IVisualisation(object):
    '''
    Visualiser interface.
    '''

    #_extractor = None
    #figure = None

    def __init__(self):
        '''
        Constructor
        '''
        self._extractor = Extractor()
        self._counter = 0
        self._got_items = False

    def _add_legend(self):
        legend()

    def _should_add_meta_marks(self):
        """
        Tells if additional graph stuff (axis labels, legend)
        should be placed 
        @return: Boolean
        """
        return not conf.combine_plots or self._counter == 0

    def get_title(self):
        if conf.auto_title:
            return self._auto_graph_title()
        else:
            return conf.graph_title

    def _auto_graph_title(self):
        if conf.combine_plots:
            country_representation = ", ".join(
                [str(item).upper() for item in self._get_items()])
        else:
            country_representation = str(
                self._get_items()[self._counter]).upper()
        title = "%s - %s" % (country_representation, conf.title_end)
        return title

    def _start_new_figure(self):
        self.figure = figure()
        hold(True)
        suptitle(self.get_title(), fontsize=16)

    def _finish_figure(self):
        """
        save or show the figure.
        @param item: optional if you want to combine multiple
        plots so that an individual name can be used 
        """
        self._add_legend()
        if conf.write_to_file:  # indeed write to file
            name, extension = os.path.splitext(conf.filename)
            extension = extension[1:]
            if not conf.combine_plots:
                try:
                    ending = str(self._get_items()[self._counter - 1]).lower()
                except AttributeError:  # except not needed
                    ending = str(self._counter).lower()
                name = name + "-" + ending
            self.figure.savefig(name + "." + extension, format=extension)
        elif conf.combine_plots or self._counter == len(self._get_items()):
            # we'll just plot it live in a new window
            show()

    def _get_items(self):
        """
        Get items that form independent units of data for
        drawing. Normally these are countries, but this can
        be overriden. Each item then gets passed to the
        _create_figure function to draw them on a graph.
        @return: list of items
        """
        if not self._got_items:
            self._extractor.fetch_data(conf.countries, conf.indicators,
                                       conf.start_date, conf.end_date)
            self._extractor.process(conf.process_indicators,
                                    method="slope",
                                    look_back_years=conf.look_back_years)
            self._got_items = True
        countries = self._extractor.get_countries()
        return countries

    def _create_figure(self, item):
        """
        Create a figure and return it as a matplotlib object. Must override.
        """
        raise MustOverrideError

    def create_all_figures(self):
        """
        Write all figures to file(s) or plot in one or more windows.
        """
        # we create only one figure if this is a combo plot
        if conf.combine_plots:
            self._start_new_figure()
        # iterate through items (e.g. countries)
        other_items = conf.countries
        items = self._get_items()
        for item in self._get_items():
            if not conf.combine_plots:
                self._start_new_figure()
            self._create_figure(item)
            # this counter is important for subclasses. Be careful!
            self._counter += 1
            if not conf.combine_plots:
                self._finish_figure()
        # store the plots in case this is a combined plot
        if conf.combine_plots:
            self._finish_figure()
Exemple #9
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.extractor = Extractor()
Exemple #10
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.extractor = Extractor()
Exemple #11
0
class SamplesSet(object):
    '''
    Responsible for building train and test sets
    '''
    def __init__(self, look_back_years):
        '''
        Constructor
        '''
        self.t_loc = conf.sample_selection_file
        self.extractor = Extractor()
        self.look_back_years = look_back_years
        self.preprocessor = Preprocessor()
        # sample set placeholders
        self.crisis_samples = []
        self.normal_samples = []
        self.metadata = Metadata(conf, look_back_years)

#    def buil_per_conf(self):
#        self.build_from_crises_file_from_crises_file(True)
#        pass

    def interesting_years_before(self, target_year):
        return range(target_year - self.look_back_years, target_year)

    def assign_samples(self,
                       indicators,
                       event_years,
                       event_class,
                       country_code="?"):
        # method creates machine learning samples from indicators
        # arguments:
        # event_years - years of crises or normal periods
        # (as specified in the sample selection file or in a rule)
        # classification - desired class corresponding to these years
        samples = []
        # select only interesting values from the indicator
        for event_year in event_years:
            interesting_years = self.interesting_years_before(event_year)
            try:
                features = []
                for indicator in indicators:
                    new_features = self.preprocessor.preprocess_indicator(
                        indicator, interesting_years)
                    features.extend(new_features)
                sample_description = country_code.upper() + "-" + str(
                    event_year)
                sample = Sample(features,
                                event_class,
                                description=sample_description)
                samples.append(sample)
            except NonExistentDataError:
                pass
        return samples

    def convert_to_boundaries(self, event_years, look_back_years):
        """
        convert a list of event years and look back years into
        a list of 2-tuples of boundaries (begin_year, end_year)
        """
        boundaries = []
        for event_year in event_years:
            boundaries.append((event_year - look_back_years, event_year - 1))
        return boundaries

    def events_to_boundaries(self, all_events, look_back_years):
        event_boundaries = {}
        for key, value in all_events.items():
            event_boundaries[key] = self.convert_to_boundaries(
                value, look_back_years)
        return event_boundaries

    def combine_events(self, t_crises, t_normal):
        all_events = {}
        for key in t_crises:
            years = []
            years.extend(t_crises[key])
            years.extend(t_normal[key])
            all_events[key] = years
        return all_events

    def divide_single(self, samples, test_percentage):
        # divide a list of samples to train and test samples
        if test_percentage == 0:
            train_samples = samples
            test_samples = []
        else:
            number_test = int(len(samples) * test_percentage)
            test_samples = sample(samples, number_test)
            train_samples = list(set(samples).difference(set(test_samples)))
        return train_samples, test_samples

    def divide(self, crisis_samples, normal_samples, test_percentage):
        # same as divide_simple, only does that for both crisis and normal samples and combines them
        # into single train and test lists
        self.train_samples, self.test_samples = self.divide_single(
            crisis_samples, test_percentage)
        new_train_samples, new_test_samples = self.divide_single(
            normal_samples, test_percentage)
        self.train_samples.extend(new_train_samples)
        self.test_samples.extend(new_test_samples)
        return self.train_samples, self.test_samples

    def build_from_crises_file(self,
                               country_codes,
                               feature_indicators,
                               test_percentage,
                               sparse=True):
        """
        Entry method that builds a samples set by fetching the data using the extractor.
        Classes are determined from a crisis XLS file.
        
        sparse - if True it fetches the data for the necessary years only.
        """
        # clear the sample sets
        self.crisis_samples = []
        self.normal_samples = []
        # get the years classified as crises / normal periods
        dates_input = Input()
        t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc)
        crises_list, normal_list = dates_input.parse_sample_selection_to_list(
            self.t_loc)
        # download the data from the World Bank
        if sparse:
            # we fetch only what we need
            # all the events combined - important so that we can only download data near those years
            events = self.combine_events(t_crises, t_normal)
            event_boundaries = self.events_to_boundaries(
                events, conf.look_back_years)
            countries = self.extractor.fetch_data_sparse(
                country_codes, feature_indicators, event_boundaries,
                conf.wb_pause)
        else:
            # we fetch all the data first
            # boundaries
            start_date = min(min(crises_list),
                             min(normal_list)) - conf.look_back_years
            end_date = max(max(crises_list), max(normal_list))
            countries = self.extractor.fetch_data(country_codes,
                                                  feature_indicators,
                                                  start_date, end_date,
                                                  conf.wb_pause)
        # assign the samples
        for country in countries:
            # fetch all the indicators for target country
            indicators = []
            for ind_code in feature_indicators:
                indicator = country.get_indicator(ind_code)
                indicators.append(indicator)
            # create samples from those indicators - in crises...
            crisis_years = t_crises[country.code]
            new_samples = self.assign_samples(indicators, crisis_years,
                                              CRISIS_CLASS, country.code)
            self.crisis_samples.extend(new_samples)
            # ... and in normal periods
            normal_years = t_normal[country.code]
            new_samples = self.assign_samples(indicators, normal_years,
                                              NORMAL_CLASS, country.code)
            self.normal_samples.extend(new_samples)
        return self.divide(self.crisis_samples, self.normal_samples,
                           test_percentage)

    def build_by_condition(self, country_codes, indicators, feature_indicators,
                           test_percentage):
        # determine crises according to some condition/rule
        raise NotImplemented
Exemple #12
0
class IVisualisation(object):
    '''
    Visualiser interface.
    '''
    
    #_extractor = None
    #figure = None
    
    def __init__(self):
        '''
        Constructor
        '''
        self._extractor = Extractor()
        self._counter = 0
        self._got_items = False
        
    def _add_legend(self):
        legend()
        
    def _should_add_meta_marks(self):
        """
        Tells if additional graph stuff (axis labels, legend)
        should be placed 
        @return: Boolean
        """
        return not conf.combine_plots or self._counter == 0
    
    def get_title(self):
        if conf.auto_title:
            return self._auto_graph_title()
        else:
            return conf.graph_title
        
    def _auto_graph_title(self):
        if conf.combine_plots:
            country_representation = ", ".join([str(item).upper() for item in self._get_items()])
        else:
            country_representation = str(self._get_items()[self._counter]).upper()
        title = "%s - %s" % (country_representation, conf.title_end) 
        return title 
        
    def _start_new_figure(self):
        self.figure = figure()
        hold(True)
        suptitle(self.get_title(), fontsize=16)
    
    def _finish_figure(self):
        """
        save or show the figure.
        @param item: optional if you want to combine multiple
        plots so that an individual name can be used 
        """
        self._add_legend()
        if conf.write_to_file: # indeed write to file
            name, extension = os.path.splitext(conf.filename)
            extension = extension[1:]
            if not conf.combine_plots:
                try:
                    ending = str(self._get_items()[self._counter-1]).lower()
                except AttributeError: # except not needed
                    ending = str(self._counter).lower()
                name = name + "-" + ending          
            self.figure.savefig(name + "." + extension, format=extension)
        elif conf.combine_plots or self._counter == len(self._get_items()):
            # we'll just plot it live in a new window
            show()
        
    def _get_items(self):
        """
        Get items that form independent units of data for
        drawing. Normally these are countries, but this can
        be overriden. Each item then gets passed to the
        _create_figure function to draw them on a graph.
        @return: list of items
        """
        if not self._got_items:
            self._extractor.fetch_data(conf.countries, conf.indicators, conf.start_date, conf.end_date)
            self._extractor.process(conf.process_indicators,
                                   method = "slope",
                                   look_back_years=conf.look_back_years)
            self._got_items = True
        countries = self._extractor.get_countries()
        return countries
    
    def _create_figure(self, item):
        """
        Create a figure and return it as a matplotlib object. Must override.
        """
        raise MustOverrideError
    
    def create_all_figures(self):
        """
        Write all figures to file(s) or plot in one or more windows.
        """
        # we create only one figure if this is a combo plot
        if conf.combine_plots:
            self._start_new_figure()
        # iterate through items (e.g. countries)
        other_items = conf.countries
        items = self._get_items()
        for item in self._get_items():
            if not conf.combine_plots:
                self._start_new_figure()
            self._create_figure(item)
            # this counter is important for subclasses. Be careful!
            self._counter += 1
            if not conf.combine_plots:
                self._finish_figure()
        # store the plots in case this is a combined plot 
        if conf.combine_plots:
            self._finish_figure()