예제 #1
0
 def test_directories(self):
     ele = Elections("Sejm", "2015")
     raw_dir = ele.raw_dir
     rescribed_dir = ele.rescribed_dir
     preprocessed_dir = ele.preprocessed_dir
     visualized_dir = ele.visualized_dir
     self.assertEqual(raw_dir, RAW_DATA_DIRECTORY)
     self.assertEqual(rescribed_dir, RESCRIBED_DATA_DIRECTORY)
     self.assertEqual(preprocessed_dir, PREPROCESSED_DATA_DIRECTORY)
     self.assertEqual(
         visualized_dir, "./pkwscraper/data/sejm/2015/visualized/")
예제 #2
0
 def test_init(self):
     ele = Elections("Sejm", "2015")
     # check if correctly set
     self.assertEqual(ele._Elections__elections_type, "sejm")
     self.assertEqual(ele._Elections__elections_year, 2015)
     # check if set at all
     ele._Elections__base_url
     ele._Elections__raw_dir
     ele._Elections__rescribed_dir
     ele._Elections__preprocessed_dir
     ele._Elections__visualized_dir
     ele._Elections__ScraperClass
     ele._Elections__PreprocessingClass
예제 #3
0
 def test_get_preprocessing_class(self):
     ele = Elections("Sejm", "2015")
     _PreprocessingClass = ele.get_preprocessing_class()
     self.assertIs(_PreprocessingClass, Sejm2015Preprocessing)
예제 #4
0
 def test_get_scraper_class(self):
     ele = Elections("Sejm", "2015")
     _ScraperClass = ele.get_scraper_class()
     self.assertIs(_ScraperClass, Sejm2015Scraper)
예제 #5
0
 def test_year(self):
     ele = Elections("Sejm", "2015")
     year = ele.year
     self.assertEqual(year, 2015)
예제 #6
0
 def test_election_type(self):
     ele = Elections("Sejm", "2015")
     election_type = ele.election_type
     self.assertEqual(election_type, "sejm")
예제 #7
0
 def test_base_url_path(self):
     ele = Elections("Sejm", "2015")
     base_url = ele.base_url
     self.assertEqual(base_url, "https://parlament2015.pkw.gov.pl")
예제 #8
0
 def test_wrong_elections(self):
     with self.assertRaises(ValueError):
         Elections("sejm", 2017)
예제 #9
0
 def test_not_implemented_elections(self):
     with self.assertRaises(NotImplementedError):
         Elections("sejm", 2019)
     with self.assertRaises(NotImplementedError):
         Elections("sejm", "2023")
예제 #10
0
 def test_wrong_elections_year(self):
     with self.assertRaises(ValueError):
         Elections("sejm", "mcmlxxv")
예제 #11
0
 def test_wrong_elections_type(self):
     with self.assertRaises(ValueError):
         Elections("Rada Polityki Pieniężnej", 2015)
예제 #12
0
    def __init__(self,
                 elections,
                 function,
                 colormap,
                 granularity,
                 unit=None,
                 outlines_granularity=None,
                 normalization=True,
                 title=None,
                 show_legend=False,
                 show_grid=False,
                 output_filename=None,
                 interpolation='linear'):
        """
        Constructor does basic checks and creates class attributes.

        elections: (str, int) - type and year (unambiguous identifier)
            of elections,
        function: callable - function to evaluate data for single unit,
        colormap: callable - function or object that converts numerical
            values returned by function to proper colors,
        granularity: str - the level of territorial units that plot
            will be split into,
        unit: (str, ID) or None - the unit to which analysis will be
            limited; it is the pair of name of granularity, and then
            the ID of specific unit (that means it has to be determined
            earlier outside the class); if None - the plot is made for
            the whole country,
        outlines_granularity - level of territorial units that borders
            will be placed on top of plot as contours,
        normalization: bool - whether or not values from all units
            should be scaled to (0,1) range before passing to colormap,
        title: str - title of plot that is placed over the plot,
        show_legend: bool - whether or not to show the color key in
            form of legend, can contain extreme values written next
            to it,
        show_grid: bool - whether or not to show the frame around
            the units plot,
        output_filename: str or None - if None - the result will be
            displayed in new window, otherwise, it will be rendered to
            image file saved to given filenam in default visualizing
            directory,
        interpolation: str - method of interpolation of colors in the
            colormap.
        """
        # unpack unit
        if unit is None:
            unit_granularity = None
            unit_id = None
        else:
            unit_granularity, unit_id = unit

        # translate English variants of arguments
        if granularity in GRANULARITY_DICT:
            granularity = GRANULARITY_DICT[granularity]
        if outlines_granularity in GRANULARITY_DICT:
            outlines_granularity = GRANULARITY_DICT[outlines_granularity]
        if unit_granularity in GRANULARITY_DICT:
            unit_granularity = GRANULARITY_DICT[unit_granularity]

        # basic correctness checks
        if granularity not in GRANULARITY_DICT.values():
            raise ValueError('`granularity` should be one of: "voivodships", '
                             '"constituencies", "districts" or "communes"')

        if outlines_granularity not in GRANULARITY_DICT.values():
            raise ValueError(
                '`outlines_granularity` should be one of: "voivodships", '
                '"constituencies", "districts" or "communes"')

        if unit_granularity is not None \
           and unit_granularity not in GRANULARITY_DICT.values():
            raise ValueError(
                '`unit` first part should be one of: "voivodships", '
                '"constituencies", "districts" or "communes"')

        if not isinstance(elections, tuple) or len(elections) != 2:
            raise TypeError(
                "Please, provide elections identifier: (type, year).")

        # assing arguments
        elections_type, year = elections
        self.elections = Elections(elections_type=elections_type, year=year)
        self.function = function
        self.colormap = colormap
        self.granularity = granularity
        self.unit_granularity = unit_granularity
        self.unit_id = unit_id
        self.outlines_granularity = outlines_granularity
        self.normalization = normalization
        self.title = title
        self.show_legend = show_legend
        self.show_grid = show_grid
        self.output_filename = output_filename
        self.interpolation = interpolation
        self.vis = None
        self.source_db = None
예제 #13
0
class Controller:
    """
    This is the main class of the project, that calls all steps
    of data processing. This is created by passing main parameters
    and, most importantly, evaluating function. This renders a plot
    to image or for showing in separate window.
    """
    def __init__(self,
                 elections,
                 function,
                 colormap,
                 granularity,
                 unit=None,
                 outlines_granularity=None,
                 normalization=True,
                 title=None,
                 show_legend=False,
                 show_grid=False,
                 output_filename=None,
                 interpolation='linear'):
        """
        Constructor does basic checks and creates class attributes.

        elections: (str, int) - type and year (unambiguous identifier)
            of elections,
        function: callable - function to evaluate data for single unit,
        colormap: callable - function or object that converts numerical
            values returned by function to proper colors,
        granularity: str - the level of territorial units that plot
            will be split into,
        unit: (str, ID) or None - the unit to which analysis will be
            limited; it is the pair of name of granularity, and then
            the ID of specific unit (that means it has to be determined
            earlier outside the class); if None - the plot is made for
            the whole country,
        outlines_granularity - level of territorial units that borders
            will be placed on top of plot as contours,
        normalization: bool - whether or not values from all units
            should be scaled to (0,1) range before passing to colormap,
        title: str - title of plot that is placed over the plot,
        show_legend: bool - whether or not to show the color key in
            form of legend, can contain extreme values written next
            to it,
        show_grid: bool - whether or not to show the frame around
            the units plot,
        output_filename: str or None - if None - the result will be
            displayed in new window, otherwise, it will be rendered to
            image file saved to given filenam in default visualizing
            directory,
        interpolation: str - method of interpolation of colors in the
            colormap.
        """
        # unpack unit
        if unit is None:
            unit_granularity = None
            unit_id = None
        else:
            unit_granularity, unit_id = unit

        # translate English variants of arguments
        if granularity in GRANULARITY_DICT:
            granularity = GRANULARITY_DICT[granularity]
        if outlines_granularity in GRANULARITY_DICT:
            outlines_granularity = GRANULARITY_DICT[outlines_granularity]
        if unit_granularity in GRANULARITY_DICT:
            unit_granularity = GRANULARITY_DICT[unit_granularity]

        # basic correctness checks
        if granularity not in GRANULARITY_DICT.values():
            raise ValueError('`granularity` should be one of: "voivodships", '
                             '"constituencies", "districts" or "communes"')

        if outlines_granularity not in GRANULARITY_DICT.values():
            raise ValueError(
                '`outlines_granularity` should be one of: "voivodships", '
                '"constituencies", "districts" or "communes"')

        if unit_granularity is not None \
           and unit_granularity not in GRANULARITY_DICT.values():
            raise ValueError(
                '`unit` first part should be one of: "voivodships", '
                '"constituencies", "districts" or "communes"')

        if not isinstance(elections, tuple) or len(elections) != 2:
            raise TypeError(
                "Please, provide elections identifier: (type, year).")

        # assing arguments
        elections_type, year = elections
        self.elections = Elections(elections_type=elections_type, year=year)
        self.function = function
        self.colormap = colormap
        self.granularity = granularity
        self.unit_granularity = unit_granularity
        self.unit_id = unit_id
        self.outlines_granularity = outlines_granularity
        self.normalization = normalization
        self.title = title
        self.show_legend = show_legend
        self.show_grid = show_grid
        self.output_filename = output_filename
        self.interpolation = interpolation
        self.vis = None
        self.source_db = None

    def _scrape(self):
        _ScraperClass = self.elections.get_scraper_class()
        scraper = _ScraperClass()
        scraper.run_all()

    def _preprocess(self):
        _PreprocessingClass = self.elections.get_preprocessing_class()
        preprocessing = _PreprocessingClass()
        preprocessing.run_all()

    def _load_db(self):
        try:
            # try opening preprocessed db
            DbDriver(self.elections.preprocessed_dir, read_only=True)
        except IOError:
            try:
                # preprocessed db cannot be opened, check if there is rescribed db
                DbDriver(self.elections.rescribed_dir, read_only=True)
            except IOError:
                # rescribed db cannot be opened, run downloading and scraping
                self._scrape()
            # rescribed db present, run preprocessing
            self._preprocess()
        # preprocessed db present, load it
        self.source_db = DbDriver(self.elections.preprocessed_dir,
                                  read_only=True)

    def _split_db(self):
        """
        This is used to split data in DB to correspond only to the
        single unit of analysis. Function passed by user can use all
        the DB instance data given to it, and be sure that they are
        isolated from data corresponding to other units.
        """
        # prepare indexes
        db_refs = DbReferences(self.source_db, self.granularity)

        # prepare units list
        if self.unit_granularity is None:
            units = self.source_db[self.granularity].find({})
        else:
            # check if unit is correctly set
            self.source_db[self.unit_granularity][self.unit_id]
            units = db_refs.get_relation(
                _from=self.unit_granularity,
                _to=self.granularity,
                _id=self.unit_id,
            )

        # make DB driver instance for each unit
        for unit_id in units:
            # get IDs of records in tables
            gmina_ids = db_refs.get_gmina(unit_id)
            powiat_ids = db_refs.get_powiat(unit_id)
            okreg_ids = db_refs.get_okreg(unit_id)
            voivodship_ids = db_refs.get_voivodship(unit_id)
            obwody_ids = db_refs.get_obwod(unit_id)
            protocole_ids = db_refs.get_protocole(unit_id)
            list_ids = db_refs.get_list(unit_id)
            candidate_ids = db_refs.get_candidate(unit_id)
            mandate_ids = db_refs.get_mandate(unit_id)
            wyniki_ids = db_refs.get_wyniki(unit_id)

            tables_and_ids = {
                "gminy": gmina_ids,
                "powiaty": powiat_ids,
                "okręgi": okreg_ids,
                "województwa": voivodship_ids,
                "obwody": obwody_ids,
                "protokoły": protocole_ids,
                "listy": list_ids,
                "kandydaci": candidate_ids,
                "mandaty": mandate_ids
            }
            tables_and_ids.update(wyniki_ids)

            # create db driver instance
            db = DbDriver.__new__(DbDriver)
            db._DbDriver__read_only = False
            db._DbDriver__tables = {}
            db._DbDriver__dropped_tables = []

            # copy records
            for table_name, ids_list in tables_and_ids.items():
                db.create_table(table_name)
                for _id in ids_list:
                    record = self.source_db[table_name][_id]
                    db[table_name].put(dict(record), _id=_id)

            # freeze db and conclude iteration
            db._DbDriver__read_only = True
            yield db

    def _visualize(self):
        # split db into units
        dbs = self._split_db()

        # process data
        regions = []
        values = []

        for db in dbs:
            # make region
            geo = db[self.granularity].find_one({}, fields="geo")
            region = Region.from_json(geo)
            regions.append(region)

            # evaluate value
            value = self.function(db)
            values.append(value)

        # determine outline units
        outline_geos = self.source_db[self.outlines_granularity].find(
            {}, fields="geo")
        outline_regions = [Region.from_json(geo) for geo in outline_geos]

        # make visualizer object
        self.vis = Visualizer(regions,
                              values,
                              self.colormap,
                              contours=outline_regions,
                              interpolation=self.interpolation,
                              title=self.title,
                              color_legend=self.show_legend,
                              grid=self.show_grid)

        # normalize values if set
        if self.normalization:
            self.vis.normalize_values()

        # apply colormap to values
        self.vis.render_colors()

        # prepare plot
        self.vis.prepare()

        ### TODO # add title, legend, grid, values, etc.

        # render plot to window or file
        if self.output_filename:
            visualized_dir = self.elections.visualized_dir
            if not os.path.exists(visualized_dir):
                ### TODO - make image dir, not only main dir
                os.makedirs(visualized_dir)
            output_path = visualized_dir + self.output_filename
            self.vis.save_image(output_path)
        else:
            self.vis.show()

    def run(self):
        """
        Run prepared analysis object. It first makes sure the DB is
        ready to use, or loads it and possibly runs preprocessing/etc.
        """
        self._load_db()
        self._visualize()

    def show_db_schema(self):
        """ Show tables and fields in DB as user guide. """
        raise NotImplementedError("TODO")
        return {tables: {columns: [values_type / enumerating]}}
        pass