def update_legislators(self): page = self.retrieve_legislators() # We ignore the first one because it is a placeholder. options = page(attrs={'name': 'COD_ORGAO'})[0].findAll('option')[1:] # Turn the soup objects into a list of dictionaries legislators = [] for item in options: name = ' '.join([x.title() for x in item.getText().split()]) original_id = int(item.get('value')) legislators.append(dict(name=name, original_id=original_id)) # Obtain the existing ids existing_ids = [x.id for x in Legislator.objects.filter(mandate__legislature=self.legislature).all()] # Add legislators that do not exist yet for l in legislators: if l['original_id'] in existing_ids: continue try: legislator = Legislator.objects.get(original_id=l['original_id']) self.debug("Found existing legislator: %s" % unicode(legislator)) mandate = self.mandate_for_legislator(legislator, None) except Legislator.DoesNotExist: legislator = Legislator(name=l['name'], original_id=l['original_id']) legislator.save() mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=None, legislature=self.legislature) mandate.save() self.debug("New legislator found: %s" % unicode(legislator))
def mandate_for_legislator(self, legislator, party): try: mandate = Mandate.objects.get( legislator=legislator, date_start=self.legislature.date_start) except Mandate.DoesNotExist: mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=party, legislature=self.legislature) mandate.save() self.debug("Mandate starting on %s did not exist, created." % self.legislature.date_start.strftime("%F")) return mandate
def mandate_for_legislator(self, legislator, party, state=None, original_id=None): cache_key = (legislator, party, state, original_id) if cache_key in self.mandates_cache: return self.mandates_cache[cache_key] try: mandate = Mandate.objects.get( legislator=legislator, date_start=self.legislature.date_start) except Mandate.DoesNotExist: mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=party, legislature=self.legislature, state=state) mandate.save() self.debug("Mandate starting on %s did not exist, created." % self.legislature.date_start.strftime("%F")) if original_id: mandate.original_id = original_id mandate.save() self.mandates_cache[cache_key] = mandate return mandate
def update_legislators(self): page = self.retrieve_legislators() # We ignore the first one because it is a placeholder. options = page(attrs={'name': 'COD_ORGAO'})[0].findAll('option')[1:] # Turn the soup objects into a list of dictionaries legislators = [] for item in options: name = ' '.join([x.title() for x in item.getText().split()]) original_id = int(item.get('value')) legislators.append(dict(name=name, original_id=original_id)) # Obtain the existing ids existing_ids = [ x.id for x in Legislator.objects.filter( mandate__legislature=self.legislature).all() ] # Add legislators that do not exist yet for l in legislators: if l['original_id'] in existing_ids: continue try: legislator = Legislator.objects.get( original_id=l['original_id']) self.debug("Found existing legislator: %s" % unicode(legislator)) mandate = self.mandate_for_legislator(legislator, None) except Legislator.DoesNotExist: legislator = Legislator(name=l['name'], original_id=l['original_id']) legislator.save() mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=None, legislature=self.legislature) mandate.save() self.debug("New legislator found: %s" % unicode(legislator))
def mandate_for_legislator(self, legislator, party, state=None, original_id=None): try: mandate = Mandate.objects.get(legislator=legislator, date_start=self.legislature.date_start) except Mandate.DoesNotExist: mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=party, legislature=self.legislature) mandate.save() self.debug("Mandate starting on %s did not exist, created." % self.legislature.date_start.strftime("%F")) if original_id: mandate.original_id = original_id mandate.save() return mandate
def process_legislators(self, legislature): legislators = self.retrieve_legislators() if not legislators: return links = legislators.findAll( 'a', href=re.compile('^vereador_joomla2.asp\?vereador=')) for link in links: href = link.get('href') html_legislator = self.retrieve_legislator(href) if not html_legislator: continue url, code = href.split('=', 1) name = html_legislator.find(id='nome_vereador').getText() legislator = self.add_legislator(name) legislator_img = html_legislator.find( 'img', src=re.compile('imgs/fotos/')) if legislator_img: legislator_img_src = legislator_img.get('src') legislator_img_url = 'http://www1.camara.sp.gov.br/%s' % ( legislator_img_src) result = urllib.urlretrieve(legislator_img_url) legislator.picture.save( os.path.basename(legislator_img_url), File(open(result[0]))) legislator.save() self.debug('Updating legislator picture.') try: mandate = Mandate.objects.get( legislator=legislator, date_start=legislature.date_start, legislature=legislature) self.debug(u'Found existing Mandate: %s' % mandate) except Mandate.DoesNotExist: mandate = Mandate( legislator=legislator, date_start=legislature.date_start, legislature=legislature) mandate.save() self.debug(u'New Mandate found: %s' % mandate) party_name = html_legislator.find( 'img', src=re.compile('imgs/Partidos')) party_name = party_name.parent.parent.find('font', size='2') party_name = party_name.getText() party_siglum = party_name[party_name.find('(') + 1:party_name.find(')')] if 'Vereadores Licenciados' not in party_siglum: party_siglum = self._normalize_party_siglum(party_siglum) party, party_created = PoliticalParty.objects.get_or_create( siglum=party_siglum) mandate.party = party mandate.save() self.debug('Updating legislator party: %s' % party_siglum)
def process_expenses_obsolete(self, month, year, legislature, collection_run): data = self.retrieve_expenses_obsolete(month, year) if not data: return for x in data.findAll('g_deputado'): name = x.find('nm_deputado').getText().capitalize() legislator = self.add_legislator(name) try: mandate = Mandate.objects.get( legislator=legislator, date_start=legislature.date_start, legislature=legislature) self.debug(u'Found existing Mandate: %s' % mandate) except Mandate.DoesNotExist: mandate = Mandate( legislator=legislator, date_start=legislature.date_start, legislature=legislature) mandate.save() self.debug(u'New Mandate found: %s' % mandate) expense_type = x.find('list_g_tipo_despesa') for i in expense_type.findAll('g_tipo_despesa'): nature_text = i.find('nm_tipo_despesa').getText() try: nature_text = nature_text.split('-', 1)[1].strip() except IndexError: pass nature_text = nature_text.capitalize() ignore_list = [u'total', u'TOTAL', u'utilizado até 30/11/07'] ignore_matches = [s for s in ignore_list if s in nature_text] if ignore_matches: continue nature, nature_created = ExpenseNature.objects.get_or_create( name=nature_text) if nature_created: self.debug(u'New ExpenseNature found: %s' % nature) else: self.debug(u'Found existing ExpenseNature: %s' % nature) m_month = i.find('nr_mes_ref').getText() m_year = i.find('nr_ano_ref').getText() date = parse_cmsp_date(m_month, m_year) for j in i.findAll('g_beneficiario'): supplier_name = j.find('nm_beneficiario').getText() supplier_name = supplier_name.capitalize() cnpj = self.normalize_cnpj_or_cpf(j.find('nr_cnpj').getText()) if not cnpj and not supplier_name: continue try: supplier = Supplier.objects.get(identifier=cnpj) supplier_created = False except Supplier.DoesNotExist: supplier = Supplier(identifier=cnpj, name=supplier_name) supplier.save() supplier_created = True if supplier_created: self.debug(u'New Supplier found: %s' % supplier) else: self.debug(u'Found existing supplier: %s' % supplier) expensed = parse_money(j.find('vl_desp').getText()) expense = ArchivedExpense(number='None', nature=nature, date=date, expensed=expensed, mandate=mandate, supplier=supplier, collection_run=collection_run) expense.save() self.debug(u'New expense found: %s' % expense)
def process_expenses(self, month, year, legislature, collection_run): if year < 2015: return self.process_expenses_obsolete(month, year, legislature, collection_run) # CMSP now puts all data year to date on each file, so we need to get only the # last one for a given year - otherwise we duplicate data. today = datetime.now() if year == today.year and month < today.month: return elif year < today.year and month < 12: return data = self.retrieve_expenses(month, year) if not data: return for x in data.findAll('tabelaportalitemreembolso'): name = x.find('vereador').getText().capitalize() legislator = self.add_legislator(name) try: mandate = Mandate.objects.get( legislator=legislator, date_start=legislature.date_start, legislature=legislature) self.debug(u'Found existing Mandate: %s' % mandate) except Mandate.DoesNotExist: mandate = Mandate( legislator=legislator, date_start=legislature.date_start, legislature=legislature) mandate.save() self.debug(u'New Mandate found: %s' % mandate) nature_text = x.find('despesa').getText() try: nature_text = nature_text.split('-', 1)[1].strip() except IndexError: pass nature_text = nature_text.capitalize() nature, nature_created = ExpenseNature.objects.get_or_create( name=nature_text) if nature_created: self.debug(u'New ExpenseNature found: %s' % nature) else: self.debug(u'Found existing ExpenseNature: %s' % nature) m_month = x.find('mes').getText() m_year = x.find('ano').getText() date = parse_cmsp_date(m_month, m_year) supplier_name = x.find('fornecedor').getText() supplier_name = supplier_name.capitalize() cnpj = self.normalize_cnpj_or_cpf(x.find('cnpj').getText()) if not cnpj and not supplier_name: continue try: supplier = Supplier.objects.get(identifier=cnpj) supplier_created = False except Supplier.DoesNotExist: supplier = Supplier(identifier=cnpj, name=supplier_name) supplier.save() supplier_created = True if supplier_created: self.debug(u'New Supplier found: %s' % supplier) else: self.debug(u'Found existing supplier: %s' % supplier) expensed = float(x.find('valor').getText()) expense = ArchivedExpense(number='None', nature=nature, date=date, expensed=expensed, mandate=mandate, supplier=supplier, collection_run=collection_run) expense.save() self.debug(u'New expense found: %s' % expense)