Ejemplo n.º 1
0
    def test_import_from_xls_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        rows.import_from_xls(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(
            call_args,
            expected_meta={
                "imported_from": "xls",
                "filename": self.filename,
                "sheet_name": "Sheet1",
            },
        )

        # import using fobj
        with open(self.filename, "rb") as fobj:
            rows.import_from_xls(fobj)
            call_args = mocked_create_table.call_args_list[1]
            self.assert_create_table_data(
                call_args,
                expected_meta={
                    "imported_from": "xls",
                    "filename": self.filename,
                    "sheet_name": "Sheet1",
                },
            )
Ejemplo n.º 2
0
    def test_import_from_xls_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        table_1 = rows.import_from_xls(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args)

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            table_2 = rows.import_from_xls(fobj)
            call_args = mocked_create_table.call_args_list[1]
            self.assert_create_table_data(call_args)
Ejemplo n.º 3
0
 def test_start_and_end_row(self, mocked_create_table):
     rows.import_from_xls(self.filename,
                          start_row=6,
                          end_row=8,
                          start_column=6,
                          end_column=8)
     self.assertTrue(mocked_create_table.called)
     self.assertEqual(mocked_create_table.call_count, 1)
     call_args = mocked_create_table.call_args_list[0]
     expected_data = [
         ["12.0%", "2050-01-02", "2050-01-02T23:45:31"],
         ["13.64%", "2015-08-18", "2015-08-18T22:21:33"],
         ["13.14%", "2015-03-04", "2015-03-04T16:00:01"],
     ]
     self.assertEqual(expected_data, call_args[0][0])
Ejemplo n.º 4
0
def test_abre_doc():
    obj = d.Documento("document.xls")

    obj.abre_doc()

    assert isinstance(rows.import_from_xls(obj.doc_nome + '.' + obj.doc_tipo),
                      type(obj.doc))
Ejemplo n.º 5
0
    def test_import_from_xls_filename(self):
        table = rows.import_from_xls(self.filename)

        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'xls', 'filename': self.filename,}
        self.assertEqual(table.meta, expected_meta)
def convert_file(input_filename, output_filename):
    table = rows.import_from_xls(
        input_filename,
        sheet_name="Municípios",
        start_row=1,
        force_types={
            "cod_uf": rows.fields.TextField,
            "cod_munic": rows.fields.TextField,
            "populacao_estimada": CustomIntegerField,
        },
    )
    result = []
    for row in table:
        if not row.uf.strip() or "fonte:" in row.uf.strip().lower():
            # End of data
            break
        result.append(
            {
                "state": row.uf,
                "state_ibge_code": row.cod_uf,
                "city_ibge_code": f"{row.cod_uf}{row.cod_munic}",
                "city": row.nome_do_municipio.replace("*", "").strip(),
                "estimated_population": row.populacao_estimada,
            }
        )
    result.sort(key=lambda row: (row["state"], to_ascii(row["city"])))
    writer = rows.utils.CsvLazyDictWriter(output_filename)
    for row in result:
        writer.writerow(row)
Ejemplo n.º 7
0
    def parse_budget(self, filename, year, action):
        logging.info(f"[Budget-CE]   Parsing budget {filename}")
        # First, import the table and clean up not desired lines
        table = rows.import_from_xls(filename)
        result = []
        for row in table:
            if row.codigo.lower().strip() == "total geral":
                break
            result.append(row._asdict())

        # Then, import desired lines parsing some columns as Decimals
        table = rows.import_from_dicts(
            result,
            force_types={
                "lei": BRDecimalField,
                "lei_cred": BRDecimalField,
                "empenhado": BRDecimalField,
                "pago": BRDecimalField,
                "emp": BRDecimalField,
                "pago_2": BRDecimalField,
            },
        )
        result = []
        for row in table:
            row = row._asdict()
            action_code = row.pop("codigo")
            assert str(action_code) == str(action)
            row.update({
                "ano": year,
                "codigo_acao": action,
                "estado": "CE",
            })
            result.append(row)

        return result
Ejemplo n.º 8
0
    def test_export_to_xls_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.name)

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)
Ejemplo n.º 9
0
    def test_export_to_xls_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.name)

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)
 def test_start_and_end_row(self, mocked_create_table):
     rows.import_from_xls(
         self.filename,
         start_row=3,
         end_row=5,
         start_column=4,
         end_column=6,
     )
     self.assertTrue(mocked_create_table.called)
     self.assertEqual(mocked_create_table.call_count, 1)
     call_args = mocked_create_table.call_args_list[0]
     expected_data = [
         ['12.0%', '2050-01-02', '2050-01-02T23:45:31'],
         ['13.64%', '2015-08-18', '2015-08-18T22:21:33'],
         ['13.14%', '2015-03-04', '2015-03-04T16:00:01'],
     ]
     self.assertEqual(expected_data, call_args[0][0])
Ejemplo n.º 11
0
 def abre_doc(self):
     if self.doc_tipo == 'xls':
         self.doc = rows.import_from_xls(self.doc)
     elif self.doc_tipo == 'xlsx':
         self.doc = rows.import_from_xlsx(self.doc)
     elif self.doc_tipo == 'ods':
         self.doc = rows.import_from_ods(self.doc)
     else:
         raise Exception('Erro ao abrir o arquivo.')
Ejemplo n.º 12
0
    def test_import_from_xls_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        with open(self.filename, 'rb') as fobj:
            table = rows.import_from_xls(fobj)

        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'xls', 'filename': self.filename,}
        self.assertEqual(table.meta, expected_meta)
Ejemplo n.º 13
0
    def test_export_to_xls_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False, mode='wb')
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.file)
        temp.file.close()

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)
Ejemplo n.º 14
0
    def test_import_from_xls_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        table_1 = rows.import_from_xls(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args,
                expected_meta={'imported_from': 'xls',
                               'filename': self.filename,
                               'sheet_name': 'Sheet1',})

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            table_2 = rows.import_from_xls(fobj)
            call_args = mocked_create_table.call_args_list[1]
            self.assert_create_table_data(call_args,
                expected_meta={'imported_from': 'xls',
                               'filename': self.filename,
                               'sheet_name': 'Sheet1',})
Ejemplo n.º 15
0
    def test_import_from_xls_filename(self):
        table = rows.import_from_xls(self.filename)

        self.assert_table_equal(table, utils.table)

        expected_meta = {
            'imported_from': 'xls',
            'filename': self.filename,
        }
        self.assertEqual(table.meta, expected_meta)
Ejemplo n.º 16
0
    def test_export_to_xls_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False, mode='wb')
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.file)
        temp.file.close()

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)
Ejemplo n.º 17
0
    def read_data(self, *args, **kwargs):
        if kwargs["sheet_name"] is None:
            return []
        else:
            kwargs["skip_header"] = False

            if "end_column" not in kwargs:
                # Avoid reading all columns (even if blank)
                kwargs["end_column"] = 50

        return rows.import_from_xls(self.filename, *args, **kwargs)
Ejemplo n.º 18
0
    def test_import_from_xls_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {'encoding': 'test', 'some_key': 123, 'other': 456, }
        result = rows.import_from_xls(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs['meta'] = {'imported_from': 'xls', 'filename': self.filename, }
        self.assertEqual(call[1], kwargs)
Ejemplo n.º 19
0
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=
                OrderedDict([('jsoncolumn', rows.fields.JSONField)]))
        table.append({'jsoncolumn': '{"python": 42}'})
        rows.export_to_xls(table, filename)

        table2 = rows.import_from_xls(filename)
        self.assert_table_equal(table, table2)
Ejemplo n.º 20
0
    def test_import_from_xls_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        with open(self.filename, 'rb') as fobj:
            table = rows.import_from_xls(fobj)

        self.assert_table_equal(table, utils.table)

        expected_meta = {
            'imported_from': 'xls',
            'filename': self.filename,
        }
        self.assertEqual(table.meta, expected_meta)
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([('jsoncolumn',
                                                rows.fields.JSONField)]))
        table.append({'jsoncolumn': '{"python": 42}'})
        rows.export_to_xls(table, filename)

        table2 = rows.import_from_xls(filename)
        self.assert_table_equal(table, table2)
Ejemplo n.º 22
0
def import_xls(f_obj):
    content = f_obj.read()
    f_obj.seek(0)

    temp_xls = NamedTemporaryFile(suffix='.xls', delete=False)
    temp_xls.write(content)
    temp_xls.close()

    data = rows.import_from_xls(temp_xls)
    temp_file = Path(temp_xls.name)
    os.remove(temp_file)

    return data
    def test_export_to_xls_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.name)

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)

        temp.file.seek(0)
        result = temp.file.read()
        export_in_memory = rows.export_to_xls(utils.table, None)
        self.assertEqual(result, export_in_memory)
Ejemplo n.º 24
0
    def test_export_to_xls_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_xls(utils.table, temp.name)

        table = rows.import_from_xls(temp.name)
        self.assert_table_equal(table, utils.table)

        temp.file.seek(0)
        result = temp.file.read()
        export_in_memory = rows.export_to_xls(utils.table, None)
        self.assertEqual(result, export_in_memory)
Ejemplo n.º 25
0
def import_xls(f_obj):
    content = f_obj.read()
    f_obj.seek(0)

    temp_xls = NamedTemporaryFile(suffix=".xls", delete=False)
    temp_xls.write(content)
    temp_xls.close()

    temp_file = Path(temp_xls.name)
    with open(temp_file, "rb") as temp_xls:
        data = rows.import_from_xls(temp_xls)

    os.remove(temp_file)
    return data
    def test_import_from_xls_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        rows.import_from_xls(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args,
                                      expected_meta={
                                          'imported_from': 'xls',
                                          'filename': self.filename,
                                          'sheet_name': 'Sheet1',
                                      })

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            rows.import_from_xls(fobj)
            call_args = mocked_create_table.call_args_list[1]
            self.assert_create_table_data(call_args,
                                          expected_meta={
                                              'imported_from': 'xls',
                                              'filename': self.filename,
                                              'sheet_name': 'Sheet1',
                                          })
Ejemplo n.º 27
0
    def test_import_from_xls_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {"some_key": 123, "other": 456}
        result = rows.import_from_xls(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs["meta"] = {
            "imported_from": "xls",
            "filename": self.filename,
            "sheet_name": "Sheet1",
        }
        self.assertEqual(call[1], kwargs)
Ejemplo n.º 28
0
    def extract(self):
        filename, metadata = self.filename, self.metadata

        # From 2017-11 to 2018-12, get data from Brasil.IO (in CSV) using the
        # `extract_magistrados` helper function.
        if "contracheque.csv" in filename.name:
            yield from extract_magistrados(filename, self.state)

        else:
            # TODO: check repeated months

            if metadata["ano"] == 2018 or (metadata["ano"] == 2017
                                           and metadata["mes"] in (11, 12)):
                # Data already converted in contracheque.csv
                return

            extension = filename.name.split(".")[-1].lower()
            if extension == "xls":
                table = rows.import_from_xls(filename, start_row=3)
                for row in table:
                    yield {
                        "cargo": row.cargo_no_orgao,
                        "nome": row.nome,
                        "rendimento_bruto": row.total_decreditos_v,
                        "rendimento_liquido": row.rendimentoliquido_xi,
                    }

            elif extension == "ods":
                fields_1 = OrderedDict([
                    ("nome", rows.fields.TextField),
                    ("lotacao", rows.fields.TextField),
                    ("cargo_no_orgao", rows.fields.TextField),
                    ("remuneracao_paradigma", MoneyField),
                    ("vantagens_pessoais", MoneyField),
                    ("subsidio_diferenca_de_subsidio_funcao_de_confianca_ou_cargo_em_comissao",
                     MoneyField),
                    ("auxilios_indenizacoes", MoneyField),
                    ("auxilios_indenizacoes_pagto_tesouraria", MoneyField),
                    ("vantagens_eventuais", MoneyField),
                    ("decimo_terceiro_adiantamento2a_parcela", MoneyField),
                    ("terco_constitucional_de_ferias", MoneyField),
                    ("pensao_provisoria_de_montepio", MoneyField),
                    ("total_de_creditos", MoneyField),
                    ("previdencia_publica", MoneyField),
                    ("imposto_de_renda", MoneyField),
                    ("descontos_diversos", MoneyField),
                    ("retencao_por_teto_constitucional", MoneyField),
                    ("total_de_descontos_debitos", MoneyField),
                    ("rendimento_liquido", MoneyField),
                    ("remuneracao_do_orgao_de_origem", MoneyField),
                    ("diarias", MoneyField),
                ])
                fields_2 = OrderedDict([
                    ("matricula", rows.fields.TextField),
                    ("nome", rows.fields.TextField),
                    ("lotacao", rows.fields.TextField),
                    ("cargo_no_orgao", rows.fields.TextField),
                    ("remuneracao_paradigma", MoneyField),
                    ("vantagens_pessoais", MoneyField),
                    ("subsidio_diferenca_de_subsidio_funcao_de_confianca_ou_cargo_em_comissao",
                     MoneyField),
                    ("auxilios_indenizacoes", MoneyField),
                    ("auxilios_indenizacoes_pagto_tesouraria", MoneyField),
                    ("vantagens_eventuais", MoneyField),
                    ("decimo_terceiro_adiantamento2a_parcela", MoneyField),
                    ("terco_constitucional_de_ferias", MoneyField),
                    ("pensao_provisoria_de_montepio", MoneyField),
                    ("total_de_creditos", MoneyField),
                    ("previdencia_publica", MoneyField),
                    ("imposto_de_renda", MoneyField),
                    ("descontos_diversos", MoneyField),
                    ("retencao_por_teto_constitucional", MoneyField),
                    ("total_de_descontos_debitos", MoneyField),
                    ("rendimento_liquido", MoneyField),
                    ("remuneracao_do_orgao_de_origem", MoneyField),
                    ("diarias", MoneyField),
                ])
                import decimal
                try:
                    table = rows.import_from_ods(filename,
                                                 start_row=3,
                                                 end_column=20,
                                                 skip_header=True,
                                                 fields=fields_1)
                except (ValueError,
                        decimal.InvalidOperation):  # "Matricula" is hidden
                    return  # TODO: fix this case
                else:
                    for row in table:
                        yield {
                            "nome": row.nome,
                            "cargo": row.cargo_no_orgao,
                            "rendimento_liquido": row.rendimento_liquido,
                            "rendimento_bruto": row.total_de_creditos,
                        }

            elif extension == "pdf":
                return
                    legal_representant
                )
        else:
            docs_from_contract[doc.name] = {
                "documents": [str(doc.document).strip(".0")],
                "legal_representants": [
                    {
                        "name": doc.legal_representant,
                        "document": str(doc.legal_representant_document).strip(".0"),
                    }
                ],
            }
    return docs_from_contract


raw_contracts = rows.import_from_xls("data/contracts/city-hall-contracts-2016-2017.xls")
raw_docs_from_contract = rows.import_from_csv(
    "data/contracts/documents-from-contracts.csv"
)

documents = set(raw_contracts["cpfcnpj"])
docs_from_contract = consolidate_entities(raw_docs_from_contract)


invalid_document = []
contracts = []
for contract in raw_contracts:
    contract_info = {
        "code": contract.no_contrato,
        "description": contract.objeto,
        "start_date": contract.inicio_do_contrato,