Beispiel #1
0
    def update_legislators(self):
        page = self.retrieve_legislators()

        # We ignore the first one because it is a placeholder.
        options = page(attrs={'name': 'COD_ORGAO'})[0].findAll('option')[1:]

        # Turn the soup objects into a list of dictionaries
        legislators = []
        for item in options:
            name = ' '.join([x.title() for x in item.getText().split()])
            original_id = int(item.get('value'))
            legislators.append(dict(name=name, original_id=original_id))

        # Obtain the existing ids
        existing_ids = [x.id for x in Legislator.objects.filter(mandate__legislature=self.legislature).all()]

        # Add legislators that do not exist yet
        for l in legislators:
            if l['original_id'] in existing_ids:
                continue

            try:
                legislator = Legislator.objects.get(original_id=l['original_id'])
                self.debug("Found existing legislator: %s" % unicode(legislator))

                mandate = self.mandate_for_legislator(legislator, None)
            except Legislator.DoesNotExist:
                legislator = Legislator(name=l['name'], original_id=l['original_id'])
                legislator.save()

                mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=None, legislature=self.legislature)
                mandate.save()

                self.debug("New legislator found: %s" % unicode(legislator))
Beispiel #2
0
 def mandate_for_legislator(self, legislator, party):
     try:
         mandate = Mandate.objects.get(
             legislator=legislator, date_start=self.legislature.date_start)
     except Mandate.DoesNotExist:
         mandate = Mandate(legislator=legislator,
                           date_start=self.legislature.date_start,
                           party=party,
                           legislature=self.legislature)
         mandate.save()
         self.debug("Mandate starting on %s did not exist, created." %
                    self.legislature.date_start.strftime("%F"))
     return mandate
Beispiel #3
0
    def mandate_for_legislator(self,
                               legislator,
                               party,
                               state=None,
                               original_id=None):
        cache_key = (legislator, party, state, original_id)
        if cache_key in self.mandates_cache:
            return self.mandates_cache[cache_key]

        try:
            mandate = Mandate.objects.get(
                legislator=legislator, date_start=self.legislature.date_start)
        except Mandate.DoesNotExist:
            mandate = Mandate(legislator=legislator,
                              date_start=self.legislature.date_start,
                              party=party,
                              legislature=self.legislature,
                              state=state)
            mandate.save()
            self.debug("Mandate starting on %s did not exist, created." %
                       self.legislature.date_start.strftime("%F"))

        if original_id:
            mandate.original_id = original_id
            mandate.save()

        self.mandates_cache[cache_key] = mandate

        return mandate
Beispiel #4
0
    def update_legislators(self):
        page = self.retrieve_legislators()

        # We ignore the first one because it is a placeholder.
        options = page(attrs={'name': 'COD_ORGAO'})[0].findAll('option')[1:]

        # Turn the soup objects into a list of dictionaries
        legislators = []
        for item in options:
            name = ' '.join([x.title() for x in item.getText().split()])
            original_id = int(item.get('value'))
            legislators.append(dict(name=name, original_id=original_id))

        # Obtain the existing ids
        existing_ids = [
            x.id for x in Legislator.objects.filter(
                mandate__legislature=self.legislature).all()
        ]

        # Add legislators that do not exist yet
        for l in legislators:
            if l['original_id'] in existing_ids:
                continue

            try:
                legislator = Legislator.objects.get(
                    original_id=l['original_id'])
                self.debug("Found existing legislator: %s" %
                           unicode(legislator))

                mandate = self.mandate_for_legislator(legislator, None)
            except Legislator.DoesNotExist:
                legislator = Legislator(name=l['name'],
                                        original_id=l['original_id'])
                legislator.save()

                mandate = Mandate(legislator=legislator,
                                  date_start=self.legislature.date_start,
                                  party=None,
                                  legislature=self.legislature)
                mandate.save()

                self.debug("New legislator found: %s" % unicode(legislator))
Beispiel #5
0
    def mandate_for_legislator(self, legislator, party, state=None, original_id=None):
        try:
            mandate = Mandate.objects.get(legislator=legislator, date_start=self.legislature.date_start)
        except Mandate.DoesNotExist:
            mandate = Mandate(legislator=legislator, date_start=self.legislature.date_start, party=party,
                              legislature=self.legislature)
            mandate.save()
            self.debug("Mandate starting on %s did not exist, created." % self.legislature.date_start.strftime("%F"))

        if original_id:
            mandate.original_id = original_id
            mandate.save()

        return mandate
Beispiel #6
0
    def process_legislators(self, legislature):
        legislators = self.retrieve_legislators()
        if not legislators:
            return

        links = legislators.findAll(
            'a',
            href=re.compile('^vereador_joomla2.asp\?vereador='))

        for link in links:
            href = link.get('href')
            html_legislator = self.retrieve_legislator(href)
            if not html_legislator:
                continue

            url, code = href.split('=', 1)
            name = html_legislator.find(id='nome_vereador').getText()

            legislator = self.add_legislator(name)

            legislator_img = html_legislator.find(
                'img',
                src=re.compile('imgs/fotos/'))

            if legislator_img:
                legislator_img_src = legislator_img.get('src')

                legislator_img_url = 'http://www1.camara.sp.gov.br/%s' % (
                    legislator_img_src)

                result = urllib.urlretrieve(legislator_img_url)

                legislator.picture.save(
                    os.path.basename(legislator_img_url), File(open(result[0])))

                legislator.save()

                self.debug('Updating legislator picture.')

            try:
                mandate = Mandate.objects.get(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                self.debug(u'Found existing Mandate: %s' % mandate)
            except Mandate.DoesNotExist:
                mandate = Mandate(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                mandate.save()
                self.debug(u'New Mandate found: %s' % mandate)

            party_name = html_legislator.find(
                'img',
                src=re.compile('imgs/Partidos'))

            party_name = party_name.parent.parent.find('font', size='2')
            party_name = party_name.getText()
            party_siglum = party_name[party_name.find('(') + 1:party_name.find(')')]

            if 'Vereadores Licenciados' not in party_siglum:
                party_siglum = self._normalize_party_siglum(party_siglum)
                party, party_created = PoliticalParty.objects.get_or_create(
                    siglum=party_siglum)

                mandate.party = party
                mandate.save()
                self.debug('Updating legislator party: %s' % party_siglum)
Beispiel #7
0
    def process_expenses_obsolete(self, month, year, legislature, collection_run):
        data = self.retrieve_expenses_obsolete(month, year)
        if not data:
            return

        for x in data.findAll('g_deputado'):
            name = x.find('nm_deputado').getText().capitalize()
            legislator = self.add_legislator(name)

            try:
                mandate = Mandate.objects.get(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                self.debug(u'Found existing Mandate: %s' % mandate)
            except Mandate.DoesNotExist:
                mandate = Mandate(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                mandate.save()
                self.debug(u'New Mandate found: %s' % mandate)

            expense_type = x.find('list_g_tipo_despesa')

            for i in expense_type.findAll('g_tipo_despesa'):
                nature_text = i.find('nm_tipo_despesa').getText()
                try:
                    nature_text = nature_text.split('-', 1)[1].strip()
                except IndexError:
                    pass

                nature_text = nature_text.capitalize()

                ignore_list = [u'total', u'TOTAL', u'utilizado até 30/11/07']
                ignore_matches = [s for s in ignore_list if s in nature_text]
                if ignore_matches:
                    continue

                nature, nature_created = ExpenseNature.objects.get_or_create(
                    name=nature_text)

                if nature_created:
                    self.debug(u'New ExpenseNature found: %s' % nature)
                else:
                    self.debug(u'Found existing ExpenseNature: %s' % nature)

                m_month = i.find('nr_mes_ref').getText()
                m_year = i.find('nr_ano_ref').getText()
                date = parse_cmsp_date(m_month, m_year)

                for j in i.findAll('g_beneficiario'):
                    supplier_name = j.find('nm_beneficiario').getText()
                    supplier_name = supplier_name.capitalize()
                    cnpj = self.normalize_cnpj_or_cpf(j.find('nr_cnpj').getText())

                    if not cnpj and not supplier_name:
                        continue

                    try:
                        supplier = Supplier.objects.get(identifier=cnpj)
                        supplier_created = False
                    except Supplier.DoesNotExist:
                        supplier = Supplier(identifier=cnpj, name=supplier_name)
                        supplier.save()
                        supplier_created = True

                    if supplier_created:
                        self.debug(u'New Supplier found: %s' % supplier)
                    else:
                        self.debug(u'Found existing supplier: %s' % supplier)

                    expensed = parse_money(j.find('vl_desp').getText())

                    expense = ArchivedExpense(number='None',
                                              nature=nature,
                                              date=date,
                                              expensed=expensed,
                                              mandate=mandate,
                                              supplier=supplier,
                                              collection_run=collection_run)
                    expense.save()

                    self.debug(u'New expense found: %s' % expense)
Beispiel #8
0
    def process_expenses(self, month, year, legislature, collection_run):
        if year < 2015:
            return self.process_expenses_obsolete(month, year, legislature, collection_run)

        # CMSP now puts all data year to date on each file, so we need to get only the
        # last one for a given year - otherwise we duplicate data.
        today = datetime.now()
        if year == today.year and month < today.month:
            return
        elif year < today.year and month < 12:
            return

        data = self.retrieve_expenses(month, year)
        if not data:
            return

        for x in data.findAll('tabelaportalitemreembolso'):
            name = x.find('vereador').getText().capitalize()
            legislator = self.add_legislator(name)

            try:
                mandate = Mandate.objects.get(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                self.debug(u'Found existing Mandate: %s' % mandate)
            except Mandate.DoesNotExist:
                mandate = Mandate(
                    legislator=legislator,
                    date_start=legislature.date_start,
                    legislature=legislature)
                mandate.save()
                self.debug(u'New Mandate found: %s' % mandate)

            nature_text = x.find('despesa').getText()

            try:
                nature_text = nature_text.split('-', 1)[1].strip()
            except IndexError:
                pass

            nature_text = nature_text.capitalize()

            nature, nature_created = ExpenseNature.objects.get_or_create(
                name=nature_text)

            if nature_created:
                self.debug(u'New ExpenseNature found: %s' % nature)
            else:
                self.debug(u'Found existing ExpenseNature: %s' % nature)

            m_month = x.find('mes').getText()
            m_year = x.find('ano').getText()
            date = parse_cmsp_date(m_month, m_year)

            supplier_name = x.find('fornecedor').getText()
            supplier_name = supplier_name.capitalize()
            cnpj = self.normalize_cnpj_or_cpf(x.find('cnpj').getText())

            if not cnpj and not supplier_name:
                continue

            try:
                supplier = Supplier.objects.get(identifier=cnpj)
                supplier_created = False
            except Supplier.DoesNotExist:
                supplier = Supplier(identifier=cnpj, name=supplier_name)
                supplier.save()
                supplier_created = True

            if supplier_created:
                self.debug(u'New Supplier found: %s' % supplier)
            else:
                self.debug(u'Found existing supplier: %s' % supplier)

            expensed = float(x.find('valor').getText())

            expense = ArchivedExpense(number='None',
                                      nature=nature,
                                      date=date,
                                      expensed=expensed,
                                      mandate=mandate,
                                      supplier=supplier,
                                      collection_run=collection_run)
            expense.save()

            self.debug(u'New expense found: %s' % expense)