def retrieve_legislators(self, url): html = BaseCollector.retrieve_uri(self, url, post_process=False, force_encoding='utf-8') return rows.import_from_html(BytesIO(html.encode('utf-8')), preserve_html=True)
def retrieve_data_for_year(self, year): uri = 'http://www.senado.gov.br/transparencia/LAI/verba/{0}.csv'.format(year) self.debug(u'Downloading {0}'.format(uri)) return BaseCollector.retrieve_uri( self, uri, force_encoding='windows-1252', post_process=False )
def retrieve_actual_data(self, code, month, year): uri = 'https://www.cmbh.mg.gov.br/transparencia/vereadores/verba-indenizatoria' data = {'codVereadorVI': '', 'mes': '{:0>2}'.format(month), 'ano': year, 'vereador': code} headers = { 'Origin': 'https://www.cmbh.mg.gov.br', 'Referer': 'https://www.cmbh.mg.gov.br/transparencia/verba-indenizatoria', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_months(self): uri = 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/lista_meses.php' data = {'tipo': 'd'} headers = { 'Referer': 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/index.php', 'Origin': 'http://www.cmbh.mg.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_legislators(self, month): uri = 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_vereadores.php' data = {'mes': month} headers = { 'Referer': 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/lista_meses.php', 'Origin': 'http://www.cmbh.mg.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_data_for_year(self, legislator, year): uri = "http://www.senado.gov.br/transparencia/sen/verba/VerbaMes.asp" data = {"COD_ORGAO": legislator.original_id, "ANO_EXERCICIO": year} headers = { "Referer": "http://www.senado.gov.br/transparencia/sen/verba/verbaAno.asp", "Origin": "http://www.senado.gov.br", } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_expense_types(self, month, legislator, code): uri = 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_tipodespesa.php' data = {'mes': month, 'vereador': legislator, 'cod': code} headers = { 'Referer': 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_vereadores.php', 'Origin': 'http://www.cmbh.mg.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_legislators(self, month): uri = 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_vereadores.php' data = {'mes': month} headers = { 'Referer': 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/index.php', 'Origin': 'http://www.cmbh.mg.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def retrieve_data_for_year(self, year): uri = 'http://www.senado.gov.br/transparencia/LAI/verba/%d.csv' % year self.debug("Downloading %s" % uri) return BaseCollector.retrieve_uri(self, uri, force_encoding='windows-1252', post_process=False)
def retrieve_data_for_year(self, year): uri = 'http://www.senado.gov.br/transparencia/LAI/verba/{0}.csv'.format( year) self.debug(u'Downloading {0}'.format(uri)) return BaseCollector.retrieve_uri(self, uri, force_encoding='windows-1252', post_process=False)
def retrieve_data_for_year(self, legislator, year): uri = 'http://www.senado.gov.br/transparencia/sen/verba/VerbaMes.asp' data = { 'COD_ORGAO': legislator.original_id, 'ANO_EXERCICIO': year, } headers = { 'Referer': 'http://www.senado.gov.br/transparencia/sen/verba/verbaAno.asp', 'Origin': 'http://www.senado.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def update_legislators(self): data = BaseCollector.retrieve_uri(self, TRANSPARENCIA_URL) legislators_data = merge_lists( self._get_options('selectDep', 'id', data), self._get_options('field-deputados', 'url', data), 'key', ) for legislator_data in legislators_data: legislator_url = legislator_data.get('url') if not legislator_url: self.debug( u'URL for Legislator {0} with id {1} does not exist'. format( legislator_data['name'], legislator_data['id'], )) continue url = '{0}{1}'.format(ALEPE_URL, legislator_url) legislator_html = self.retrieve_uri(url) data_header = legislator_html.find('div', {'class': 'text'}) name = data_header.find('h1').text party_siglum = data_header.find('span', {'class': 'subtitle'}).text resume = data_header.find('div', {'class': 'resume'}).text picture = '{0}{1}'.format( ALEPE_URL, legislator_html.find('figure').find('img')['src'], ) legislator, created = Legislator.objects.get_or_create(name=name) if created: self.debug(u'New legislator: %s' % unicode(legislator)) else: self.debug(u'Found existing legislator: %s' % unicode(legislator)) legislator.about = resume legislator.picture = picture # FIXME # legislator.email = '' # legislator.alternative_names.append('') legislator.site = url legislator.save() party, _ = PoliticalParty.objects.get_or_create( siglum=party_siglum) self.mandate_for_legislator( legislator, party, original_id=legislator_data.get('id'), )
def retrieve_actual_data(self, code, seq, legislator, nature, month): uri = 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_valordespesa.php' data = { 'cod': code, 'seq': seq, 'vereador': legislator, 'tipodespesa': nature, 'mes': month } headers = { 'Referer': 'http://www.cmbh.mg.gov.br/extras/verba_indenizatoria_nota_fiscal/oracle_lista_tipodespesa.php', 'Origin': 'http://www.cmbh.mg.gov.br', } return BaseCollector.retrieve_uri(self, uri, data, headers)
def update_legislators(self): data = BaseCollector.retrieve_uri(self, TRANSPARENCIA_URL) legislators_data = merge_lists( self._get_options('selectDep', 'id', data), self._get_options('field-deputados', 'url', data), 'key', ) for legislator_data in legislators_data: legislator_url = legislator_data.get('url') if not legislator_url: self.debug( u'URL for Legislator {0} with id {1} does not exist'.format( legislator_data['name'], legislator_data['id'], ) ) continue url = '{0}{1}'.format(ALEPE_URL, legislator_url) legislator_html = self.retrieve_uri(url) data_header = legislator_html.find('div', {'class': 'text'}) name = data_header.find('h1').text party_siglum = data_header.find('span', {'class': 'subtitle'}).text resume = data_header.find('div', {'class': 'resume'}).text picture = '{0}{1}'.format( ALEPE_URL, legislator_html.find('figure').find('img')['src'], ) legislator, created = Legislator.objects.get_or_create(name=name) if created: self.debug(u'New legislator: %s' % unicode(legislator)) else: self.debug(u'Found existing legislator: %s' % unicode(legislator)) legislator.about = resume legislator.picture = picture # FIXME # legislator.email = '' # legislator.alternative_names.append('') legislator.site = url legislator.save() party, _ = PoliticalParty.objects.get_or_create(siglum=party_siglum) self.mandate_for_legislator( legislator, party, original_id=legislator_data.get('id'), )
def retrieve_expenses(self, month, year): uri = 'http://www2.camara.sp.gov.br/SAEG/%s%s.xml' % (year, month) return BaseCollector.retrieve_uri(self, uri, force_encoding='utf-8')
def retrieve_legislators(self): uri = 'http://www.senado.gov.br/transparencia/' return BaseCollector.retrieve_uri(self, uri)
def retrieve_expenses(self, month, year): uri = 'http://www2.camara.sp.gov.br/SAEG/%s%s.xml' % (year, month) return BaseCollector.retrieve_uri(self, uri)
def retrieve_legislators(self): uri = 'http://www.camara.gov.br/SitCamaraWS/Deputados.asmx/ObterDeputados' return BaseCollector.retrieve_uri(self, uri)
def retrieve_legislators(self): uri = 'http://www1.camara.sp.gov.br/vereadores_joomla.asp' return BaseCollector.retrieve_uri(self, uri)
def retrieve_expenses_obsolete(self, month, year): month = '%02d' % month uri = 'http://www.camara.sp.gov.br/wp-content/uploads/transparencia/saeg/%s%s.XML' % (year, month) return BaseCollector.retrieve_uri(self, uri, force_encoding='utf-8')
def retrieve_expenses(self, month, year): month = '%02d' % month uri = 'http://www2.camara.sp.gov.br/sisgv/Arquivos/%s%s.XML' % (year, month) return BaseCollector.retrieve_uri(self, uri, force_encoding='utf-8')
def retrieve_legislator(self, link): uri = 'http://www1.camara.sp.gov.br/%s' % link return BaseCollector.retrieve_uri(self, uri)
def retrieve_legislators(self): uri = 'http://www25.senado.leg.br/web/transparencia/sen/' return BaseCollector.retrieve_uri(self, uri, force_encoding='utf-8')