def _parse_speeches(self, root):
        for session in root:
            session_code = xml_extract_text(session, 'codigo')
            session_date = xml_extract_date(session, 'data')
            session_num = xml_extract_text(session, 'numero')
            for phase in session.find('fasesSessao'):
                phase_code = xml_extract_text(phase, 'codigo')
                phase_desc = xml_extract_text(phase, 'descricao')
                for speech in phase.find('discursos'):
                    speech_speaker_num = xml_extract_text(speech, 'orador/numero')
                    speech_speaker_name = xml_extract_text(speech, 'orador/nome')
                    speech_speaker_party = xml_extract_text(speech, 'orador/partido')
                    speech_speaker_state = xml_extract_text(speech, 'orador/uf')

                    try:
                        speech_started_at = xml_extract_datetime(speech, 'horaInicioDiscurso')
                    except ValueError as ve:
                        print("WARNING: Error parsing speech start time for {} - {}/{} on {}\n{}".format(
                            speech_speaker_name,
                            speech_speaker_party,
                            speech_speaker_state,
                            session_date,
                            ve))
                        continue

                    speech_room_num = xml_extract_text(speech, 'numeroQuarto')
                    speech_insertion_num = xml_extract_text(speech, 'numeroInsercao')

                    yield [
                        session_code,
                        session_date,
                        session_num,
                        phase_code,
                        phase_desc,
                        speech_speaker_num,
                        speech_speaker_name,
                        speech_speaker_party,
                        speech_speaker_state,
                        speech_started_at,
                        speech_room_num,
                        speech_insertion_num
                    ]
Example #2
0
    def test_extract_date_supports_custom_format(self):
        extracted = helpers.xml_extract_date(self.sampleXml, 'usDate',
                                             '%m/%d/%Y')
        expected = datetime(2017, 4, 30, 0, 0)

        self.assertEqual(expected, extracted)
Example #3
0
    def test_extract_date_default_to_br_format(self):
        extracted = helpers.xml_extract_date(self.sampleXml, 'brDate')
        expected = datetime(2017, 5, 31, 0, 0)

        self.assertEqual(expected, extracted)