def test_load_table_names(self): """ Unit test """ # arrange table_filenames = ["labada.csv", "macarena.csv"] mock_os_listdir = MagicMock() mock_os_listdir.return_value = table_filenames mock_db = MagicMock() mock_db.db_directory = self.directory mock_db.limit = None mock_db._DbDriver__read_only = True mock_db._DbDriver__tables = {} # act with patch("pkwscraper.lib.dbdriver.os.listdir", mock_os_listdir): DbDriver._load_table_names(mock_db) # assert mock_os_listdir.assert_called_once_with(self.directory) mock_db._load_excel.assert_not_called() mock_db._load_csv.assert_not_called() mock_db._load.assert_not_called() self.assertDictEqual(mock_db._DbDriver__tables, { "labada": None, "macarena": None })
def test_init_not_exists(self): # test error if read only and not exists with self.assertRaises(IOError): dbdriver = DbDriver(db_directory=self.directory, read_only=True) # test fields and creating directory dbdriver = DbDriver(db_directory=self.directory, limit=50) self.assertEqual(dbdriver.db_directory, self.directory) self.assertEqual(dbdriver.limit, 50) self.assertFalse(dbdriver._DbDriver__read_only) self.assertListEqual(dbdriver._DbDriver__dropped_tables, []) self.assertDictEqual(dbdriver._DbDriver__tables, {}) self.assertTrue(os.path.exists(self.directory)) # clean up os.rmdir(self.directory)
def _load_db(self): try: # try opening preprocessed db DbDriver(self.elections.preprocessed_dir, read_only=True) except IOError: try: # preprocessed db cannot be opened, check if there is rescribed db DbDriver(self.elections.rescribed_dir, read_only=True) except IOError: # rescribed db cannot be opened, run downloading and scraping self._scrape() # rescribed db present, run preprocessing self._preprocess() # preprocessed db present, load it self.source_db = DbDriver(self.elections.preprocessed_dir, read_only=True)
def main(): grans = ["communes", "districts", "constituencies", "voivodships"] names = ["1comm", "2distr", "3const", "4voivod"] # whole country for gran, name in zip(grans, names): print(f"processing {gran}...") out_gran = "voivodships" if gran == "voivodships" else "constituencies" ctrl_i = Controller( ("Sejm", 2015), function, colormap, granularity=gran, outlines_granularity=out_gran, normalization=False, output_filename=f"winners_{name}.png" ) ctrl_i.run() # only mazovian voivodship db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True) mazovian_id = db["województwa"].find_one( {"name": "MAZOWIECKIE"}, fields="_id") for gran, name in zip(grans, names): print(f"processing {gran}...") out_gran = "voivodships" if gran == "voivodships" else "constituencies" ctrl_j = Controller( ("Sejm", 2015), function, colormap, granularity=gran, unit=("voivodships", mazovian_id), outlines_granularity=out_gran, normalization=False, output_filename=f"mazovia_winners_{name}.png" ) ctrl_j.run()
def test_load_csv(self): # arrange self._make_synthetic_data() # act df_1 = DbDriver._load_csv(self.path_1) df_2 = DbDriver._load_csv(self.path_2) # assert self.assertEqual(len(df_1.columns), 2) self.assertEqual(len(df_1), 3) self.assertEqual(df_1.index.name, "_id") self.assertEqual(len(df_2.columns), 2) self.assertEqual(len(df_2), 3) self.assertEqual(df_2.index.name, "_id") # absterge self._clean_synthetic_data()
def __init__(self, source_db=None, target_db=None): # source db if source_db is None: source_db = DbDriver(RESCRIBED_DATA_DIRECTORY, read_only=True) if not isinstance(source_db, DbDriver): raise TypeError("Please pass an instance of `DbDriver` or `None`.") if not source_db.read_only: raise RuntimeError( "Please pass `DbDriver` for read only or `None`.") self.source_db = source_db # target db if target_db is None: target_db = DbDriver(PREPROCESSED_DATA_DIRECTORY) if not isinstance(target_db, DbDriver): raise TypeError("Please pass an instance of `DbDriver` or `None`.") if target_db.read_only: raise RuntimeError("Please pass `DbDriver` for writing or `None`.") self.target_db = target_db
def test_read_only(self): """ Unit test """ dbdriver = DbDriver.__new__(DbDriver) dbdriver._DbDriver__read_only = True self.assertTrue(dbdriver.read_only) dbdriver._DbDriver__read_only = False self.assertFalse(dbdriver.read_only) with self.assertRaises(TypeError): dbdriver.read_only()
def test_get_item(self): """ Unit test """ dbdriver = DbDriver.__new__(DbDriver) table = MagicMock() dbdriver._DbDriver__tables = {"MyTable": table} dbdriver._load_table = MagicMock() with self.assertRaises(KeyError): dbdriver["NotExistingTable"] result = dbdriver["MyTable"] dbdriver._load_table.assert_not_called() self.assertIs(result, table)
def test_create_table(self): """ Unit test """ # arrange new_table_name = "labada" MockTableClass = MagicMock() mock_table = MagicMock() MockTableClass.return_value = mock_table mock_db = MagicMock() mock_db._DbDriver__read_only = False mock_db._DbDriver__tables = {} mock_db._DbDriver__dropped_tables = [new_table_name, "other"] # act with patch("pkwscraper.lib.dbdriver.Table", MockTableClass): DbDriver.create_table(mock_db, new_table_name) # assert self.assertDictEqual(mock_db._DbDriver__tables, {new_table_name: mock_table}) self.assertListEqual(mock_db._DbDriver__dropped_tables, ["other"])
def test_dump_tables(self): """ Unit test """ # arrange mock_table = MagicMock() mock_db = MagicMock() mock_db._DbDriver__read_only = False mock_db._DbDriver__tables = { "new_table": mock_table, "not_changed_table": None } mock_db._DbDriver__dropped_tables = ["old_table", "missing_table"] mock_db._filepath.side_effect = [ "./here/old_table.csv", "./here/missing_table.csv", "./here/new_table.csv" ] mock_os = MagicMock() mock_os.path.exists.side_effect = [True, False] mock_df = MagicMock() mock_table.to_df.return_value = mock_df # act with patch("pkwscraper.lib.dbdriver.os", mock_os): DbDriver.dump_tables(mock_db) # assert mock_db._filepath.assert_has_calls( [call("old_table"), call("missing_table"), call("new_table")]) mock_os.path.exists.assert_has_calls( [call("./here/old_table.csv"), call("./here/missing_table.csv")]) mock_os.remove.assert_called_once_with("./here/old_table.csv") mock_df.to_csv.assert_called_once_with("./here/new_table.csv", sep=";") self.assertListEqual(mock_db._DbDriver__dropped_tables, [])
def test_get_item_not_loaded(self): """ Unit test """ # arrange dbdriver = DbDriver.__new__(DbDriver) table = MagicMock() dbdriver._DbDriver__tables = {"MyTable": None} dbdriver._load_table = MagicMock() dbdriver._load_table.return_value = table # act result = dbdriver["MyTable"] # assert dbdriver._load_table.assert_called_once_with("MyTable") self.assertIs(result, table)
def test_delete_table(self): """ Unit test """ # arrange table_name = "labada" mock_table = MagicMock() mock_table_2 = MagicMock() mock_db = MagicMock() mock_db._DbDriver__read_only = False mock_db._DbDriver__tables = { table_name: mock_table, "other": mock_table_2 } mock_db._DbDriver__dropped_tables = [] # act DbDriver.delete_table(mock_db, table_name) # assert self.assertDictEqual(mock_db._DbDriver__tables, {"other": mock_table_2}) self.assertListEqual(mock_db._DbDriver__dropped_tables, [table_name])
def test_filepath(self): """ Unit test """ # arrange dbdriver = MagicMock() dbdriver.db_directory = "./some_directory" name = "table_name" expected = [ "./some_directory\\table_name.csv", "./some_directory/table_name.csv" ] # act result = DbDriver._filepath(dbdriver, name) # assert self.assertIn(result, expected)
def test_init_exists(self): # create some synthetic tables data self._make_synthetic_data() # test tables data dbdriver = DbDriver(db_directory=self.directory, read_only=True) self.assertEqual(len(dbdriver._DbDriver__tables), 2) self.assertDictEqual(dbdriver._DbDriver__tables, { "first_table": None, "second_table": None }) dbdriver["first_table"] dbdriver["second_table"] self.assertDictEqual( dbdriver._DbDriver__tables["first_table"]._Table__data, { 101: { 'num': 9, 'char': 'a' }, 102: { 'num': 16, 'char': 'b' }, 103: { 'num': 25, 'char': 'c' }, }) self.assertDictEqual( dbdriver._DbDriver__tables["second_table"]._Table__data, { 0: { 'num': 36, 'char': 'd' }, 1: { 'num': 49, 'char': 'e' }, 2: { 'num': 64, 'char': 'f' }, }) # clean up self._clean_synthetic_data()
def __init__(self, db=None): # create downloader object self.dl = Downloader(year=2015, directory=RAW_DATA_DIRECTORY) # open db for rescribing if db is None: db = DbDriver(RESCRIBED_DATA_DIRECTORY) if not isinstance(db, DbDriver): raise TypeError("Please pass an instance of `DbDriver` or `None`.") if db.read_only: raise RuntimeError( "Please pass `DbDriver` for writing or `None`.") self.db = db # for checking self.all_votes = 0
def test_load_table(self): """ Unit test """ # arrange table_name = "labada" filepath = os.path.join(self.directory, "labada.csv") mock_db = MagicMock() mock_db.db_directory = self.directory mock_df = MagicMock() mock_db._load_csv.return_value = mock_df mock_db._filepath.return_value = filepath mock_db.limit = None mock_db._DbDriver__read_only = True mock_db._DbDriver__tables = {} mock_table = MagicMock() MockTableClass = MagicMock() MockTableClass.from_df.return_value = mock_table mock_os_path_size = MagicMock() mock_os_path_size.return_value = 1000 # act with patch("pkwscraper.lib.dbdriver.Table", MockTableClass): with patch("pkwscraper.lib.dbdriver.os.path.getsize", mock_os_path_size): result = DbDriver._load_table(mock_db, table_name) # assert mock_db._filepath.assert_called_once_with(table_name) mock_db._load_excel.assert_not_called() mock_db._load_csv.assert_called_once_with(filepath) mock_os_path_size.assert_called_once_with(filepath) MockTableClass.from_df.assert_called_once_with(mock_df, limit=None, read_only=True) self.assertDictEqual(mock_db._DbDriver__tables, {"labada": mock_table}) self.assertIs(result, mock_table)
def main(): # open DB db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True) # choose units to visualize tables = ["gminy", "powiaty", "województwa", "okręgi"] regions = [] for table_name in tables: geos = db[table_name].find({}, fields="geo") regions += [Region.from_json(geo) for geo in geos] # prepare regions and values n = len(regions) values = n * [0] colormap = lambda x: [random.random() for _ in range(3)] + [0.4] # make visualizer vis = Visualizer(regions, values, colormap) vis.render_colors() vis.prepare() vis.show()
def test_init_nested_directory(self): db_directory = self.directory + "other/directory/level/" # prepare DB dbdriver = DbDriver(db_directory=db_directory) dbdriver.create_table("test") dbdriver.dump_tables() # assert self.assertEqual(dbdriver.db_directory, db_directory) self.assertTrue(os.path.exists(db_directory)) self.assertTrue(os.path.exists(db_directory + "test.csv")) # clean up shutil.rmtree(self.directory) self.assertFalse(os.path.exists(db_directory))
def main(): # open DB db = DbDriver(SEJM_2015_DATA_DIRECTORY, read_only=True) # get party membership/support for each candidate candidates = db["kandydaci"].find( {"is_crossed_out": False}, fields=["_id", "party"]) for candidate in candidates: party = candidate[1] if party.startswith("nie należy do partii politycznej"): candidate[1] = None candidate_to_party = {cand_id: party for cand_id, party in candidates} # prepare data dicts party_list = list(set(party for _, party in candidates)) party_results = {party: {"mandates": 0, "votes": 0} for party in party_list} # mandate winners mandate_winners_ids = db["mandaty"].find({}, fields="candidate") for candidate_id in mandate_winners_ids: party = candidate_to_party[candidate_id] party_results[party]["mandates"] += 1 # determine results table names constituency_nos = db["okręgi"].find({}, fields="number") table_names = [f"wyniki_{const_no}" for const_no in constituency_nos] # iterate over constituencies for table_name in table_names: voting_results = db[table_name].find({}) for result_i in voting_results.values(): for cand_id in result_i: if cand_id in ["_id", "obwod", "candidates_count"]: continue party = candidate_to_party[cand_id] votes = int(result_i[cand_id]) party_results[party]["votes"] += votes # sort results sorted_parties = sorted( party_results, key=lambda party: party_results[party]["votes"], reverse=True ) # present results longest_name_length = max(len(p) for p in party_list if p) longest_votes_length = len(str(max(p["votes"] for p in party_results.values()))) longest_mandates_length = len(str(max(p["mandates"] for p in party_results.values()))) first_spaces = longest_name_length + longest_votes_length + 4 second_spaces = longest_mandates_length + 3 print("Kandydatki i kandydaci następujących partii uzyskały:") print() for party in sorted_parties: party_data = party_results[party] mandates = party_data["mandates"] votes = party_data["votes"] if party is None: party = "bezpartyjne/i" else: party = f'"{party}"' n_spaces_1 = first_spaces - len(party) - len(str(votes)) n_spaces_2 = second_spaces - len(str(mandates)) spacing_1 = " " * n_spaces_1 spacing_2 = " " * n_spaces_2 print(f'{party}:{spacing_1}{votes} głosów' f'{spacing_2}{mandates} mandatów')
def test_whole(self): """ Main integration test """ # arrange record_1 = {"a": 1, "b": 2} record_2 = {"c": 8, "b": 4} # make and save db db = DbDriver(db_directory=self.directory) db.create_table("my_table") id_1 = db["my_table"].put(record_1) id_2 = db["my_table"].put(record_2) db.dump_tables() # open the db again db2 = DbDriver(db_directory=self.directory) my_table = db2["my_table"] records = my_table.find({}) self.assertEqual(len(records), 2) self.assertDictEqual(records[id_1], record_1) self.assertDictEqual(records[id_2], record_2) # remove db deleting_access_code = db2.get_deleting_access() deleting_access_code = deleting_access_code[43:53] db2.delete(deleting_access_code) assert not os.path.exists(self.directory)
def test_delete(self): # arrange self._make_synthetic_data() self.assertTrue(os.path.exists(self.directory)) dbdriver = DbDriver(db_directory=self.directory) dbdriver.create_table("foo") dbdriver.dump_tables() dbdriver.delete_table("foo") # act deleting_access_code = dbdriver.get_deleting_access() deleting_access_code = deleting_access_code[43:53] dbdriver.delete(deleting_access_code) # assert self.assertFalse(os.path.exists(self.directory))
def _split_db(self): """ This is used to split data in DB to correspond only to the single unit of analysis. Function passed by user can use all the DB instance data given to it, and be sure that they are isolated from data corresponding to other units. """ # prepare indexes db_refs = DbReferences(self.source_db, self.granularity) # prepare units list if self.unit_granularity is None: units = self.source_db[self.granularity].find({}) else: # check if unit is correctly set self.source_db[self.unit_granularity][self.unit_id] units = db_refs.get_relation( _from=self.unit_granularity, _to=self.granularity, _id=self.unit_id, ) # make DB driver instance for each unit for unit_id in units: # get IDs of records in tables gmina_ids = db_refs.get_gmina(unit_id) powiat_ids = db_refs.get_powiat(unit_id) okreg_ids = db_refs.get_okreg(unit_id) voivodship_ids = db_refs.get_voivodship(unit_id) obwody_ids = db_refs.get_obwod(unit_id) protocole_ids = db_refs.get_protocole(unit_id) list_ids = db_refs.get_list(unit_id) candidate_ids = db_refs.get_candidate(unit_id) mandate_ids = db_refs.get_mandate(unit_id) wyniki_ids = db_refs.get_wyniki(unit_id) tables_and_ids = { "gminy": gmina_ids, "powiaty": powiat_ids, "okręgi": okreg_ids, "województwa": voivodship_ids, "obwody": obwody_ids, "protokoły": protocole_ids, "listy": list_ids, "kandydaci": candidate_ids, "mandaty": mandate_ids } tables_and_ids.update(wyniki_ids) # create db driver instance db = DbDriver.__new__(DbDriver) db._DbDriver__read_only = False db._DbDriver__tables = {} db._DbDriver__dropped_tables = [] # copy records for table_name, ids_list in tables_and_ids.items(): db.create_table(table_name) for _id in ids_list: record = self.source_db[table_name][_id] db[table_name].put(dict(record), _id=_id) # freeze db and conclude iteration db._DbDriver__read_only = True yield db
def test_read_only_errors(self): # arrange self._make_synthetic_data() db = DbDriver(self.directory, read_only=True) # saving to harddrive with self.assertRaises(IOError) as e: db.dump_tables() self.assertEqual(e.exception.args[0], "DB is for reading only.") # adding table with self.assertRaises(IOError) as e: db.create_table("test_table") self.assertEqual(e.exception.args[0], "DB is for reading only.") # puting records with self.assertRaises(IOError) as e: db["first_table"].put({"c": 5}) self.assertEqual(e.exception.args[0], "Table is for read only.") # dropping table with self.assertRaises(IOError) as e: db.delete_table("first_table") self.assertEqual(e.exception.args[0], "DB is for reading only.") # obtaining deleting access code with self.assertRaises(IOError) as e: db.get_deleting_access() self.assertEqual(e.exception.args[0], "DB is for reading only.") # deleting db with self.assertRaises(IOError) as e: db.delete(access_code="something") self.assertEqual(e.exception.args[0], "DB is for reading only.") # clean up self._clean_synthetic_data()