Esempio n. 1
0
    def test_load_table_names(self):
        """ Unit test """
        # arrange
        table_filenames = ["labada.csv", "macarena.csv"]
        mock_os_listdir = MagicMock()
        mock_os_listdir.return_value = table_filenames

        mock_db = MagicMock()
        mock_db.db_directory = self.directory

        mock_db.limit = None
        mock_db._DbDriver__read_only = True
        mock_db._DbDriver__tables = {}

        # act
        with patch("pkwscraper.lib.dbdriver.os.listdir", mock_os_listdir):
            DbDriver._load_table_names(mock_db)

        # assert
        mock_os_listdir.assert_called_once_with(self.directory)

        mock_db._load_excel.assert_not_called()
        mock_db._load_csv.assert_not_called()
        mock_db._load.assert_not_called()

        self.assertDictEqual(mock_db._DbDriver__tables, {
            "labada": None,
            "macarena": None
        })
Esempio n. 2
0
    def test_init_not_exists(self):
        # test error if read only and not exists
        with self.assertRaises(IOError):
            dbdriver = DbDriver(db_directory=self.directory, read_only=True)

        # test fields and creating directory
        dbdriver = DbDriver(db_directory=self.directory, limit=50)
        self.assertEqual(dbdriver.db_directory, self.directory)
        self.assertEqual(dbdriver.limit, 50)
        self.assertFalse(dbdriver._DbDriver__read_only)
        self.assertListEqual(dbdriver._DbDriver__dropped_tables, [])
        self.assertDictEqual(dbdriver._DbDriver__tables, {})
        self.assertTrue(os.path.exists(self.directory))

        # clean up
        os.rmdir(self.directory)
Esempio n. 3
0
 def _load_db(self):
     try:
         # try opening preprocessed db
         DbDriver(self.elections.preprocessed_dir, read_only=True)
     except IOError:
         try:
             # preprocessed db cannot be opened, check if there is rescribed db
             DbDriver(self.elections.rescribed_dir, read_only=True)
         except IOError:
             # rescribed db cannot be opened, run downloading and scraping
             self._scrape()
         # rescribed db present, run preprocessing
         self._preprocess()
     # preprocessed db present, load it
     self.source_db = DbDriver(self.elections.preprocessed_dir,
                               read_only=True)
Esempio n. 4
0
def main():
    grans = ["communes", "districts", "constituencies", "voivodships"]
    names = ["1comm", "2distr", "3const", "4voivod"]

    # whole country
    for gran, name in zip(grans, names):
        print(f"processing {gran}...")
        out_gran = "voivodships" if gran == "voivodships" else "constituencies"
        ctrl_i = Controller(
            ("Sejm", 2015), function, colormap, granularity=gran,
            outlines_granularity=out_gran, normalization=False,
            output_filename=f"winners_{name}.png"
        )
        ctrl_i.run()

    # only mazovian voivodship
    db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True)
    mazovian_id = db["województwa"].find_one(
        {"name": "MAZOWIECKIE"}, fields="_id")
    for gran, name in zip(grans, names):
        print(f"processing {gran}...")
        out_gran = "voivodships" if gran == "voivodships" else "constituencies"
        ctrl_j = Controller(
            ("Sejm", 2015), function, colormap, granularity=gran,
            unit=("voivodships", mazovian_id), outlines_granularity=out_gran,
            normalization=False, output_filename=f"mazovia_winners_{name}.png"
        )
        ctrl_j.run()
Esempio n. 5
0
    def test_load_csv(self):
        # arrange
        self._make_synthetic_data()

        # act
        df_1 = DbDriver._load_csv(self.path_1)
        df_2 = DbDriver._load_csv(self.path_2)

        # assert
        self.assertEqual(len(df_1.columns), 2)
        self.assertEqual(len(df_1), 3)
        self.assertEqual(df_1.index.name, "_id")
        self.assertEqual(len(df_2.columns), 2)
        self.assertEqual(len(df_2), 3)
        self.assertEqual(df_2.index.name, "_id")

        # absterge
        self._clean_synthetic_data()
Esempio n. 6
0
    def __init__(self, source_db=None, target_db=None):
        # source db
        if source_db is None:
            source_db = DbDriver(RESCRIBED_DATA_DIRECTORY, read_only=True)
        if not isinstance(source_db, DbDriver):
            raise TypeError("Please pass an instance of `DbDriver` or `None`.")
        if not source_db.read_only:
            raise RuntimeError(
                "Please pass `DbDriver` for read only or `None`.")
        self.source_db = source_db

        # target db
        if target_db is None:
            target_db = DbDriver(PREPROCESSED_DATA_DIRECTORY)
        if not isinstance(target_db, DbDriver):
            raise TypeError("Please pass an instance of `DbDriver` or `None`.")
        if target_db.read_only:
            raise RuntimeError("Please pass `DbDriver` for writing or `None`.")
        self.target_db = target_db
Esempio n. 7
0
    def test_read_only(self):
        """ Unit test """
        dbdriver = DbDriver.__new__(DbDriver)

        dbdriver._DbDriver__read_only = True
        self.assertTrue(dbdriver.read_only)
        dbdriver._DbDriver__read_only = False
        self.assertFalse(dbdriver.read_only)

        with self.assertRaises(TypeError):
            dbdriver.read_only()
Esempio n. 8
0
    def test_get_item(self):
        """ Unit test """
        dbdriver = DbDriver.__new__(DbDriver)
        table = MagicMock()
        dbdriver._DbDriver__tables = {"MyTable": table}
        dbdriver._load_table = MagicMock()

        with self.assertRaises(KeyError):
            dbdriver["NotExistingTable"]
        result = dbdriver["MyTable"]
        dbdriver._load_table.assert_not_called()
        self.assertIs(result, table)
Esempio n. 9
0
    def test_create_table(self):
        """ Unit test """
        # arrange
        new_table_name = "labada"
        MockTableClass = MagicMock()
        mock_table = MagicMock()
        MockTableClass.return_value = mock_table

        mock_db = MagicMock()
        mock_db._DbDriver__read_only = False
        mock_db._DbDriver__tables = {}
        mock_db._DbDriver__dropped_tables = [new_table_name, "other"]

        # act
        with patch("pkwscraper.lib.dbdriver.Table", MockTableClass):
            DbDriver.create_table(mock_db, new_table_name)

        # assert
        self.assertDictEqual(mock_db._DbDriver__tables,
                             {new_table_name: mock_table})
        self.assertListEqual(mock_db._DbDriver__dropped_tables, ["other"])
Esempio n. 10
0
    def test_dump_tables(self):
        """ Unit test """
        # arrange
        mock_table = MagicMock()
        mock_db = MagicMock()
        mock_db._DbDriver__read_only = False
        mock_db._DbDriver__tables = {
            "new_table": mock_table,
            "not_changed_table": None
        }
        mock_db._DbDriver__dropped_tables = ["old_table", "missing_table"]
        mock_db._filepath.side_effect = [
            "./here/old_table.csv", "./here/missing_table.csv",
            "./here/new_table.csv"
        ]

        mock_os = MagicMock()
        mock_os.path.exists.side_effect = [True, False]

        mock_df = MagicMock()
        mock_table.to_df.return_value = mock_df

        # act
        with patch("pkwscraper.lib.dbdriver.os", mock_os):
            DbDriver.dump_tables(mock_db)

        # assert
        mock_db._filepath.assert_has_calls(
            [call("old_table"),
             call("missing_table"),
             call("new_table")])

        mock_os.path.exists.assert_has_calls(
            [call("./here/old_table.csv"),
             call("./here/missing_table.csv")])
        mock_os.remove.assert_called_once_with("./here/old_table.csv")

        mock_df.to_csv.assert_called_once_with("./here/new_table.csv", sep=";")

        self.assertListEqual(mock_db._DbDriver__dropped_tables, [])
Esempio n. 11
0
 def test_get_item_not_loaded(self):
     """ Unit test """
     # arrange
     dbdriver = DbDriver.__new__(DbDriver)
     table = MagicMock()
     dbdriver._DbDriver__tables = {"MyTable": None}
     dbdriver._load_table = MagicMock()
     dbdriver._load_table.return_value = table
     # act
     result = dbdriver["MyTable"]
     # assert
     dbdriver._load_table.assert_called_once_with("MyTable")
     self.assertIs(result, table)
Esempio n. 12
0
    def test_delete_table(self):
        """ Unit test """
        # arrange
        table_name = "labada"
        mock_table = MagicMock()
        mock_table_2 = MagicMock()

        mock_db = MagicMock()
        mock_db._DbDriver__read_only = False
        mock_db._DbDriver__tables = {
            table_name: mock_table,
            "other": mock_table_2
        }
        mock_db._DbDriver__dropped_tables = []

        # act
        DbDriver.delete_table(mock_db, table_name)

        # assert
        self.assertDictEqual(mock_db._DbDriver__tables,
                             {"other": mock_table_2})
        self.assertListEqual(mock_db._DbDriver__dropped_tables, [table_name])
Esempio n. 13
0
 def test_filepath(self):
     """ Unit test """
     # arrange
     dbdriver = MagicMock()
     dbdriver.db_directory = "./some_directory"
     name = "table_name"
     expected = [
         "./some_directory\\table_name.csv",
         "./some_directory/table_name.csv"
     ]
     # act
     result = DbDriver._filepath(dbdriver, name)
     # assert
     self.assertIn(result, expected)
Esempio n. 14
0
    def test_init_exists(self):
        # create some synthetic tables data
        self._make_synthetic_data()

        # test tables data
        dbdriver = DbDriver(db_directory=self.directory, read_only=True)
        self.assertEqual(len(dbdriver._DbDriver__tables), 2)
        self.assertDictEqual(dbdriver._DbDriver__tables, {
            "first_table": None,
            "second_table": None
        })

        dbdriver["first_table"]
        dbdriver["second_table"]

        self.assertDictEqual(
            dbdriver._DbDriver__tables["first_table"]._Table__data, {
                101: {
                    'num': 9,
                    'char': 'a'
                },
                102: {
                    'num': 16,
                    'char': 'b'
                },
                103: {
                    'num': 25,
                    'char': 'c'
                },
            })
        self.assertDictEqual(
            dbdriver._DbDriver__tables["second_table"]._Table__data, {
                0: {
                    'num': 36,
                    'char': 'd'
                },
                1: {
                    'num': 49,
                    'char': 'e'
                },
                2: {
                    'num': 64,
                    'char': 'f'
                },
            })

        # clean up
        self._clean_synthetic_data()
Esempio n. 15
0
    def __init__(self, db=None):
        # create downloader object
        self.dl = Downloader(year=2015, directory=RAW_DATA_DIRECTORY)

        # open db for rescribing
        if db is None:
            db = DbDriver(RESCRIBED_DATA_DIRECTORY)
        if not isinstance(db, DbDriver):
            raise TypeError("Please pass an instance of `DbDriver` or `None`.")
        if db.read_only:
            raise RuntimeError(
                "Please pass `DbDriver` for writing or `None`.")
        self.db = db

        # for checking
        self.all_votes = 0
Esempio n. 16
0
    def test_load_table(self):
        """ Unit test """
        # arrange
        table_name = "labada"
        filepath = os.path.join(self.directory, "labada.csv")

        mock_db = MagicMock()
        mock_db.db_directory = self.directory
        mock_df = MagicMock()
        mock_db._load_csv.return_value = mock_df
        mock_db._filepath.return_value = filepath

        mock_db.limit = None
        mock_db._DbDriver__read_only = True
        mock_db._DbDriver__tables = {}

        mock_table = MagicMock()
        MockTableClass = MagicMock()
        MockTableClass.from_df.return_value = mock_table

        mock_os_path_size = MagicMock()
        mock_os_path_size.return_value = 1000

        # act
        with patch("pkwscraper.lib.dbdriver.Table", MockTableClass):
            with patch("pkwscraper.lib.dbdriver.os.path.getsize",
                       mock_os_path_size):
                result = DbDriver._load_table(mock_db, table_name)

        # assert
        mock_db._filepath.assert_called_once_with(table_name)
        mock_db._load_excel.assert_not_called()
        mock_db._load_csv.assert_called_once_with(filepath)
        mock_os_path_size.assert_called_once_with(filepath)

        MockTableClass.from_df.assert_called_once_with(mock_df,
                                                       limit=None,
                                                       read_only=True)

        self.assertDictEqual(mock_db._DbDriver__tables, {"labada": mock_table})
        self.assertIs(result, mock_table)
Esempio n. 17
0
def main():
    # open DB
    db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True)

    # choose units to visualize
    tables = ["gminy", "powiaty", "województwa", "okręgi"]
    regions = []
    for table_name in tables:
        geos = db[table_name].find({}, fields="geo")
        regions += [Region.from_json(geo) for geo in geos]

    # prepare regions and values
    n = len(regions)
    values = n * [0]
    colormap = lambda x: [random.random() for _ in range(3)] + [0.4]

    # make visualizer
    vis = Visualizer(regions, values, colormap)
    vis.render_colors()
    vis.prepare()
    vis.show()
Esempio n. 18
0
    def test_init_nested_directory(self):
        db_directory = self.directory + "other/directory/level/"

        # prepare DB
        dbdriver = DbDriver(db_directory=db_directory)
        dbdriver.create_table("test")
        dbdriver.dump_tables()

        # assert
        self.assertEqual(dbdriver.db_directory, db_directory)
        self.assertTrue(os.path.exists(db_directory))
        self.assertTrue(os.path.exists(db_directory + "test.csv"))

        # clean up
        shutil.rmtree(self.directory)

        self.assertFalse(os.path.exists(db_directory))
Esempio n. 19
0
def main():
    # open DB
    db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True)

    # get party membership/support for each candidate
    candidates = db["kandydaci"].find(
        {"is_crossed_out": False}, fields=["_id", "party"])

    for candidate in candidates:
        party = candidate[1]
        if party.startswith("nie należy do partii politycznej"):
            candidate[1] = None

    candidate_to_party = {cand_id: party for cand_id, party in candidates}

    # prepare data dicts
    party_list = list(set(party for _, party in candidates))
    party_results = {party: {"mandates": 0, "votes": 0}
                     for party in party_list}

    # mandate winners
    mandate_winners_ids = db["mandaty"].find({}, fields="candidate")

    for candidate_id in mandate_winners_ids:
        party = candidate_to_party[candidate_id]
        party_results[party]["mandates"] += 1

    # determine results table names
    constituency_nos = db["okręgi"].find({}, fields="number")
    table_names = [f"wyniki_{const_no}" for const_no in constituency_nos]

    # iterate over constituencies
    for table_name in table_names:
        voting_results = db[table_name].find({})
        for result_i in voting_results.values():
            for cand_id in result_i:
                if cand_id in ["_id", "obwod", "candidates_count"]:
                    continue
                party = candidate_to_party[cand_id]
                votes = int(result_i[cand_id])
                party_results[party]["votes"] += votes

    # sort results
    sorted_parties = sorted(
        party_results,
        key=lambda party: party_results[party]["votes"], reverse=True
    )

    # present results
    longest_name_length = max(len(p) for p in party_list if p)
    longest_votes_length = len(str(max(p["votes"]
                                       for p in party_results.values())))
    longest_mandates_length = len(str(max(p["mandates"]
                                          for p in party_results.values())))
    first_spaces = longest_name_length + longest_votes_length + 4
    second_spaces = longest_mandates_length + 3
    print("Kandydatki i kandydaci następujących partii uzyskały:")
    print()
    for party in sorted_parties:
        party_data = party_results[party]
        mandates = party_data["mandates"]
        votes = party_data["votes"]
        if party is None:
            party = "bezpartyjne/i"
        else:
            party = f'"{party}"'
        n_spaces_1 = first_spaces - len(party) - len(str(votes))
        n_spaces_2 = second_spaces - len(str(mandates))
        spacing_1 = " " * n_spaces_1
        spacing_2 = " " * n_spaces_2
        print(f'{party}:{spacing_1}{votes} głosów'
              f'{spacing_2}{mandates} mandatów')
Esempio n. 20
0
    def test_whole(self):
        """ Main integration test """
        # arrange
        record_1 = {"a": 1, "b": 2}
        record_2 = {"c": 8, "b": 4}

        # make and save db
        db = DbDriver(db_directory=self.directory)
        db.create_table("my_table")
        id_1 = db["my_table"].put(record_1)
        id_2 = db["my_table"].put(record_2)
        db.dump_tables()

        # open the db again
        db2 = DbDriver(db_directory=self.directory)
        my_table = db2["my_table"]
        records = my_table.find({})
        self.assertEqual(len(records), 2)
        self.assertDictEqual(records[id_1], record_1)
        self.assertDictEqual(records[id_2], record_2)

        # remove db
        deleting_access_code = db2.get_deleting_access()
        deleting_access_code = deleting_access_code[43:53]
        db2.delete(deleting_access_code)
        assert not os.path.exists(self.directory)
Esempio n. 21
0
 def test_delete(self):
     # arrange
     self._make_synthetic_data()
     self.assertTrue(os.path.exists(self.directory))
     dbdriver = DbDriver(db_directory=self.directory)
     dbdriver.create_table("foo")
     dbdriver.dump_tables()
     dbdriver.delete_table("foo")
     # act
     deleting_access_code = dbdriver.get_deleting_access()
     deleting_access_code = deleting_access_code[43:53]
     dbdriver.delete(deleting_access_code)
     # assert
     self.assertFalse(os.path.exists(self.directory))
Esempio n. 22
0
    def _split_db(self):
        """
        This is used to split data in DB to correspond only to the
        single unit of analysis. Function passed by user can use all
        the DB instance data given to it, and be sure that they are
        isolated from data corresponding to other units.
        """
        # prepare indexes
        db_refs = DbReferences(self.source_db, self.granularity)

        # prepare units list
        if self.unit_granularity is None:
            units = self.source_db[self.granularity].find({})
        else:
            # check if unit is correctly set
            self.source_db[self.unit_granularity][self.unit_id]
            units = db_refs.get_relation(
                _from=self.unit_granularity,
                _to=self.granularity,
                _id=self.unit_id,
            )

        # make DB driver instance for each unit
        for unit_id in units:
            # get IDs of records in tables
            gmina_ids = db_refs.get_gmina(unit_id)
            powiat_ids = db_refs.get_powiat(unit_id)
            okreg_ids = db_refs.get_okreg(unit_id)
            voivodship_ids = db_refs.get_voivodship(unit_id)
            obwody_ids = db_refs.get_obwod(unit_id)
            protocole_ids = db_refs.get_protocole(unit_id)
            list_ids = db_refs.get_list(unit_id)
            candidate_ids = db_refs.get_candidate(unit_id)
            mandate_ids = db_refs.get_mandate(unit_id)
            wyniki_ids = db_refs.get_wyniki(unit_id)

            tables_and_ids = {
                "gminy": gmina_ids,
                "powiaty": powiat_ids,
                "okręgi": okreg_ids,
                "województwa": voivodship_ids,
                "obwody": obwody_ids,
                "protokoły": protocole_ids,
                "listy": list_ids,
                "kandydaci": candidate_ids,
                "mandaty": mandate_ids
            }
            tables_and_ids.update(wyniki_ids)

            # create db driver instance
            db = DbDriver.__new__(DbDriver)
            db._DbDriver__read_only = False
            db._DbDriver__tables = {}
            db._DbDriver__dropped_tables = []

            # copy records
            for table_name, ids_list in tables_and_ids.items():
                db.create_table(table_name)
                for _id in ids_list:
                    record = self.source_db[table_name][_id]
                    db[table_name].put(dict(record), _id=_id)

            # freeze db and conclude iteration
            db._DbDriver__read_only = True
            yield db
Esempio n. 23
0
    def test_read_only_errors(self):
        # arrange
        self._make_synthetic_data()
        db = DbDriver(self.directory, read_only=True)

        # saving to harddrive
        with self.assertRaises(IOError) as e:
            db.dump_tables()
        self.assertEqual(e.exception.args[0], "DB is for reading only.")

        # adding table
        with self.assertRaises(IOError) as e:
            db.create_table("test_table")
        self.assertEqual(e.exception.args[0], "DB is for reading only.")

        # puting records
        with self.assertRaises(IOError) as e:
            db["first_table"].put({"c": 5})
        self.assertEqual(e.exception.args[0], "Table is for read only.")

        # dropping table
        with self.assertRaises(IOError) as e:
            db.delete_table("first_table")
        self.assertEqual(e.exception.args[0], "DB is for reading only.")

        # obtaining deleting access code
        with self.assertRaises(IOError) as e:
            db.get_deleting_access()
        self.assertEqual(e.exception.args[0], "DB is for reading only.")

        # deleting db
        with self.assertRaises(IOError) as e:
            db.delete(access_code="something")
        self.assertEqual(e.exception.args[0], "DB is for reading only.")

        # clean up
        self._clean_synthetic_data()