def _all_start_times(self, pivot, session_dates): for date in session_dates: log.debug(date.strftime("%d/%m/%Y")) file = urllib.request.urlopen( self.URL.format(date.strftime("%d/%m/%Y"), pivot)) tree = ET.ElementTree(file=file) for session in tree.getroot().findall('.//sessaoDia'): yield (date, xml_extract_text(session, 'descricao'), xml_extract_datetime(session, 'inicio'))
def __all_start_times(self, pivot, session_dates): for date in session_dates: if os.environ.get('DEBUG') == '1': print(date.strftime("%d/%m/%Y")) file = urllib.request.urlopen( self.URL.format(date.strftime("%d/%m/%Y"), pivot)) t = ET.ElementTree(file=file) for session in t.getroot().findall('.//sessaoDia'): yield (date, xml_extract_text(session, 'descricao'), xml_extract_datetime(session, 'inicio'))
def _parse_speeches(self, root): for session in root: session_code = xml_extract_text(session, 'codigo') session_date = xml_extract_date(session, 'data') session_num = xml_extract_text(session, 'numero') for phase in session.find('fasesSessao'): phase_code = xml_extract_text(phase, 'codigo') phase_desc = xml_extract_text(phase, 'descricao') for speech in phase.find('discursos'): speech_speaker_num = xml_extract_text(speech, 'orador/numero') speech_speaker_name = xml_extract_text(speech, 'orador/nome') speech_speaker_party = xml_extract_text(speech, 'orador/partido') speech_speaker_state = xml_extract_text(speech, 'orador/uf') try: speech_started_at = xml_extract_datetime(speech, 'horaInicioDiscurso') except ValueError as ve: print("WARNING: Error parsing speech start time for {} - {}/{} on {}\n{}".format( speech_speaker_name, speech_speaker_party, speech_speaker_state, session_date, ve)) continue speech_room_num = xml_extract_text(speech, 'numeroQuarto') speech_insertion_num = xml_extract_text(speech, 'numeroInsercao') yield [ session_code, session_date, session_num, phase_code, phase_desc, speech_speaker_num, speech_speaker_name, speech_speaker_party, speech_speaker_state, speech_started_at, speech_room_num, speech_insertion_num ]
def __parse_deputy_presences(self, root): term = xml_extract_text(root, 'legislatura') congressperson_document = xml_extract_text(root, 'carteiraParlamentar') # Please note that this name contains the party and state congressperson_name = xml_extract_text(root, 'nomeParlamentar') party = xml_extract_text(root, 'siglaPartido') state = xml_extract_text(root, 'siglaUF') for day in root.findall('.//dia'): date = xml_extract_datetime(day, 'data') present_on_day = xml_extract_text(day, 'frequencianoDia') justification = xml_extract_text(day, 'justificativa') for session in day.findall('.//sessao'): yield (term, congressperson_document, congressperson_name, party, state, date, present_on_day, justification, xml_extract_text(session, 'descricao'), xml_extract_text(session, 'frequencia'))
def _all_start_times(self, pivot, session_dates): for date in session_dates: if os.environ.get('DEBUG') == '1': print(date.strftime("%d/%m/%Y")) file = urllib.request.urlopen(self.URL.format(date.strftime("%d/%m/%Y"), pivot)) t = ET.ElementTree(file=file) for session in t.getroot().findall('.//sessaoDia'): yield ( date, xml_extract_text(session, 'descricao'), xml_extract_datetime(session, 'inicio') )
def _parse_deputies(self, root): for deputy in root: yield ( xml_extract_text(deputy, 'ideCadastro'), xml_extract_text(deputy, 'codOrcamento'), xml_extract_text(deputy, 'condicao'), xml_extract_text(deputy, 'matricula'), xml_extract_text(deputy, 'nome'), xml_extract_text(deputy, 'nomeParlamentar'), xml_extract_text(deputy, 'urlFoto'), xml_extract_text(deputy, 'sexo'), xml_extract_text(deputy, 'uf'), xml_extract_text(deputy, 'partido'), xml_extract_text(deputy, 'fone'), xml_extract_text(deputy, 'email'), )
def test_extract_text(self): extracted = helpers.xml_extract_text(self.sampleXml, 'simpleText') expected = 'Sample text' self.assertEqual(expected, extracted)