예제 #1
0
 def _all_start_times(self, pivot, session_dates):
     for date in session_dates:
         log.debug(date.strftime("%d/%m/%Y"))
         file = urllib.request.urlopen(
             self.URL.format(date.strftime("%d/%m/%Y"), pivot))
         tree = ET.ElementTree(file=file)
         for session in tree.getroot().findall('.//sessaoDia'):
             yield (date, xml_extract_text(session, 'descricao'),
                    xml_extract_datetime(session, 'inicio'))
예제 #2
0
 def __all_start_times(self, pivot, session_dates):
     for date in session_dates:
         if os.environ.get('DEBUG') == '1':
             print(date.strftime("%d/%m/%Y"))
         file = urllib.request.urlopen(
             self.URL.format(date.strftime("%d/%m/%Y"), pivot))
         t = ET.ElementTree(file=file)
         for session in t.getroot().findall('.//sessaoDia'):
             yield (date, xml_extract_text(session, 'descricao'),
                    xml_extract_datetime(session, 'inicio'))
 def _all_start_times(self, pivot, session_dates):
     for date in session_dates:
         if os.environ.get('DEBUG') == '1':
             print(date.strftime("%d/%m/%Y"))
         file = urllib.request.urlopen(self.URL.format(date.strftime("%d/%m/%Y"), pivot))
         t = ET.ElementTree(file=file)
         for session in t.getroot().findall('.//sessaoDia'):
             yield (
                 date,
                 xml_extract_text(session, 'descricao'),
                 xml_extract_datetime(session, 'inicio')
             )
    def __parse_deputy_presences(self, root):
        term = xml_extract_text(root, 'legislatura')
        congressperson_document = xml_extract_text(root, 'carteiraParlamentar')
        # Please note that this name contains the party and state
        congressperson_name = xml_extract_text(root, 'nomeParlamentar')
        party = xml_extract_text(root, 'siglaPartido')
        state = xml_extract_text(root, 'siglaUF')

        for day in root.findall('.//dia'):
            date = xml_extract_datetime(day, 'data')
            present_on_day = xml_extract_text(day, 'frequencianoDia')
            justification = xml_extract_text(day, 'justificativa')
            for session in day.findall('.//sessao'):
                yield (term, congressperson_document, congressperson_name,
                       party, state, date, present_on_day, justification,
                       xml_extract_text(session, 'descricao'),
                       xml_extract_text(session, 'frequencia'))
    def _parse_speeches(self, root):
        for session in root:
            session_code = xml_extract_text(session, 'codigo')
            session_date = xml_extract_date(session, 'data')
            session_num = xml_extract_text(session, 'numero')
            for phase in session.find('fasesSessao'):
                phase_code = xml_extract_text(phase, 'codigo')
                phase_desc = xml_extract_text(phase, 'descricao')
                for speech in phase.find('discursos'):
                    speech_speaker_num = xml_extract_text(speech, 'orador/numero')
                    speech_speaker_name = xml_extract_text(speech, 'orador/nome')
                    speech_speaker_party = xml_extract_text(speech, 'orador/partido')
                    speech_speaker_state = xml_extract_text(speech, 'orador/uf')

                    try:
                        speech_started_at = xml_extract_datetime(speech, 'horaInicioDiscurso')
                    except ValueError as ve:
                        print("WARNING: Error parsing speech start time for {} - {}/{} on {}\n{}".format(
                            speech_speaker_name,
                            speech_speaker_party,
                            speech_speaker_state,
                            session_date,
                            ve))
                        continue

                    speech_room_num = xml_extract_text(speech, 'numeroQuarto')
                    speech_insertion_num = xml_extract_text(speech, 'numeroInsercao')

                    yield [
                        session_code,
                        session_date,
                        session_num,
                        phase_code,
                        phase_desc,
                        speech_speaker_num,
                        speech_speaker_name,
                        speech_speaker_party,
                        speech_speaker_state,
                        speech_started_at,
                        speech_room_num,
                        speech_insertion_num
                    ]
예제 #6
0
    def test_extract_datetime_supports_custom_format(self):
        extracted = helpers.xml_extract_datetime(self.sampleXml, 'usDateTime',
                                                 '%m/%d/%Y %I:%M:%S%p')
        expected = datetime(2017, 4, 30, 23, 59, 59)

        self.assertEqual(expected, extracted)
예제 #7
0
    def test_extract_datetime_default_to_br_format(self):
        extracted = helpers.xml_extract_datetime(self.sampleXml, 'brDateTime')
        expected = datetime(2017, 5, 31, 23, 59, 59)

        self.assertEqual(expected, extracted)