def parse_mp_data(data: List[List[str]], get_english: bool = False) -> pd.DataFrame: """Parse MP data from XMLs to a pandas DataFrame table. The code will parse either Finnish or English XML. English translation is usually available only for the current MPs. Thus, using English will result in a table with both English and Finnish entries. Args: data (List[List[str]]): MPQuery results get_english (bool): parse English XML if available, defaults to False. Returns: pd.DataFrame: parsed MP data """ mps = [] for item in data: mpid, lastname, firstname = item[0:3] if get_english and not contains_empty_tag("Ammatti", item[8]): xml = etree.fromstring(item[8]) else: xml = etree.fromstring(item[7]) mp = MPInfo(xml).parse(int(mpid), firstname.strip(), lastname.strip()) mps.append(mp) return pd.DataFrame(mps).set_index("mp_id")
def test_get_district_date_range(mpinfo: MPInfo, true_district_dates: List[str]) -> None: """Test that the date range for electoral district is correctly parsed from the XML.""" districts = mpinfo.xml.xpath( "./Vaalipiirit/*[contains(name(),'Vaalipiiri')]") district_dates = [ date for district in districts for date in mpinfo.get_district_date_range(district) ] assert district_dates == true_district_dates
def test_mpinfo_parse(mpinfo: MPInfo, mpid: int, firstname: str, lastname: str, true_mp: MP) -> None: """Test that MP info is correctly parsed from the XML to an MP object.""" mp = mpinfo.parse(mpid, firstname, lastname) assert mp == true_mp
def test_get_education(mpinfo: MPInfo, true_education: str) -> None: """Test that degree names and education are correctly parsed from the XML.""" education = mpinfo.get_education() assert education == true_education
def test_get_districts(mpinfo: MPInfo, true_districts: str) -> None: """Test that electoral districts are correctly parsed from the XML.""" districts = mpinfo.get_districts() assert districts == true_districts
def test_get_pob(mpinfo: MPInfo, true_pob: str) -> None: """Test that place of birth is correctly parsed from the XML.""" pob = mpinfo.get_pob() assert pob == true_pob
def test_get_city(mpinfo: MPInfo, true_city: str) -> None: """Test that current home city is correctly parsed from the XML.""" city = mpinfo.get_city() assert city == true_city
def test_get_profession(mpinfo: MPInfo, true_profession: str) -> None: """Test that profession is correctly parsed from the XML.""" profession = mpinfo.get_profession() assert profession == true_profession
def test_get_party(mpinfo: MPInfo, true_party: str) -> None: """Test that party (/parliamentary group) is correctly parsed from the XML.""" party = mpinfo.get_party() assert party == true_party
def test_get_birthyear(mpinfo: MPInfo, true_birthyear: int) -> None: """Test that birth year is correctly parsed from the XML.""" birthyear = mpinfo.get_birthyear() assert birthyear == true_birthyear
def test_get_language(mpinfo: MPInfo, true_language: str) -> None: """Test that language is correctly parsed from the XML.""" language = mpinfo.get_language() assert language == true_language
def test_get_gender(mpinfo: MPInfo, true_gender: str) -> None: """Test that gender is correctly parsed from the XML.""" gender = mpinfo.get_gender() assert gender == true_gender
def mpinfo(request: SubRequest) -> MPInfo: """Initialize and return an MPInfo object for given XML.""" xml_path = request.param with open(xml_path, encoding="utf-8", newline="") as infile: xml = etree.fromstring(infile.read()) return MPInfo(xml)