def _preprocess_gminy(self): communes = self.source_db["gminy"].find({}) self.target_db.create_table("gminy") code_to_name_dict = { code: name for code, name in self.source_db["obwody"].find( {}, fields=["commune_code", "commune_name"]) } for c in communes.values(): code = c["code"] partial_name = c["partial_name"] geo = c.get("geo", "") district_code = get_parent_code(code) district_id, district = self.target_db["powiaty"].find_one( {"code": district_code}) full_name = code_to_name_dict[code] if full_name is None: raise ValueError( f"Cannot find commune '{partial_name}' with code {code}.") urban_or_rural = self.urban_or_rural(full_name, code) merged_name = self.merge_commune_names(partial_name, full_name, code) self.target_db["gminy"].put({ "code": code, "name": merged_name, "urban_or_rural": urban_or_rural, "geo": self._parse_geo(geo), "parent": district_id, })
def test_integer_codes(self): for input_code, expected in self.data: input_code = int(input_code) expected = int(expected) result = get_parent_code(input_code) self.assertEqual(result, expected) self.assertIs(type(result), type(expected))
def test_string_codes(self): for input_code, expected in self.data: input_code = str(input_code) expected = str(expected) result = get_parent_code(input_code) self.assertEqual(result, expected) self.assertIs(type(result), type(expected))
def urban_or_rural(commune_full_name, commune_code): district_code = get_parent_code(commune_code) if district_code == 146500: return "urban" if commune_full_name.startswith("gm. "): return "rural" if commune_full_name.startswith("m. "): return "urban" if commune_full_name.startswith("Statki "): return "marine" if commune_full_name.startswith("Zagranica"): return "abroad" raise ValueError(f"Cannot determine `urban_or_rural` " f"value from name: {commune_full_name}.")
def _preprocess_powiaty(self): districts = self.source_db["powiaty"].find({}) self.target_db.create_table("powiaty") consituencies_numbers = self.target_db["okręgi"].find(query={}, fields="number") districts_dict = {num: list() for num in consituencies_numbers} for d in districts.values(): constituency_number = d["constituency_number"] code = d["code"] * 100 name = d["name"] geo = d.get("geo", "") #if code == 146500: # code = 146501 voivod_code = get_parent_code(code) voivod_id, voivod = self.target_db["województwa"].find_one( {"code": voivod_code}) district_id = self.target_db["powiaty"].put({ "code": code, "name": name, "geo": self._parse_geo(geo), "parent": voivod_id, }) districts_dict[constituency_number].append(district_id) # assign districts to consituencies for constituency_number, districts_list in districts_dict.items(): # serialize list of districts jsoned_districts_list = json.dumps(districts_list) # add field in constituency record con_id, con_record = self.target_db["okręgi"].find_one( {"number": constituency_number}) con_record["powiat_list"] = jsoned_districts_list # update the record self.target_db["okręgi"].put(con_record, _id=con_id)
def test_too_long_number(self): input_code = 2205142 with self.assertRaises(ValueError): get_parent_code(input_code)
def test_too_short_number(self): input_code = 2058 with self.assertRaises(ValueError): get_parent_code(input_code)
def test_voivodship_parent(self): input_code = 280000 with self.assertRaises(ValueError): get_parent_code(input_code)
def test_odd_voivodship_number(self): input_code = 130612 with self.assertRaises(ValueError): get_parent_code(input_code)
def test_over_voivodships_range(self): input_code = 346102 with self.assertRaises(ValueError): get_parent_code(input_code)
def test_wrong_format(self): input_code = b"080312" with self.assertRaises(TypeError): get_parent_code(input_code)
def test_empty_input(self): input_code = "" with self.assertRaises(ValueError): get_parent_code(input_code)