def test_import_from_xls_retrieve_desired_data(self, mocked_create_table): mocked_create_table.return_value = 42 # import using filename rows.import_from_xls(self.filename) call_args = mocked_create_table.call_args_list[0] self.assert_create_table_data( call_args, expected_meta={ "imported_from": "xls", "filename": self.filename, "sheet_name": "Sheet1", }, ) # import using fobj with open(self.filename, "rb") as fobj: rows.import_from_xls(fobj) call_args = mocked_create_table.call_args_list[1] self.assert_create_table_data( call_args, expected_meta={ "imported_from": "xls", "filename": self.filename, "sheet_name": "Sheet1", }, )
def test_import_from_xls_retrieve_desired_data(self, mocked_create_table): mocked_create_table.return_value = 42 # import using filename table_1 = rows.import_from_xls(self.filename) call_args = mocked_create_table.call_args_list[0] self.assert_create_table_data(call_args) # import using fobj with open(self.filename, 'rb') as fobj: table_2 = rows.import_from_xls(fobj) call_args = mocked_create_table.call_args_list[1] self.assert_create_table_data(call_args)
def test_start_and_end_row(self, mocked_create_table): rows.import_from_xls(self.filename, start_row=6, end_row=8, start_column=6, end_column=8) self.assertTrue(mocked_create_table.called) self.assertEqual(mocked_create_table.call_count, 1) call_args = mocked_create_table.call_args_list[0] expected_data = [ ["12.0%", "2050-01-02", "2050-01-02T23:45:31"], ["13.64%", "2015-08-18", "2015-08-18T22:21:33"], ["13.14%", "2015-03-04", "2015-03-04T16:00:01"], ] self.assertEqual(expected_data, call_args[0][0])
def test_abre_doc(): obj = d.Documento("document.xls") obj.abre_doc() assert isinstance(rows.import_from_xls(obj.doc_nome + '.' + obj.doc_tipo), type(obj.doc))
def test_import_from_xls_filename(self): table = rows.import_from_xls(self.filename) self.assert_table_equal(table, utils.table) expected_meta = {'imported_from': 'xls', 'filename': self.filename,} self.assertEqual(table.meta, expected_meta)
def convert_file(input_filename, output_filename): table = rows.import_from_xls( input_filename, sheet_name="Municípios", start_row=1, force_types={ "cod_uf": rows.fields.TextField, "cod_munic": rows.fields.TextField, "populacao_estimada": CustomIntegerField, }, ) result = [] for row in table: if not row.uf.strip() or "fonte:" in row.uf.strip().lower(): # End of data break result.append( { "state": row.uf, "state_ibge_code": row.cod_uf, "city_ibge_code": f"{row.cod_uf}{row.cod_munic}", "city": row.nome_do_municipio.replace("*", "").strip(), "estimated_population": row.populacao_estimada, } ) result.sort(key=lambda row: (row["state"], to_ascii(row["city"]))) writer = rows.utils.CsvLazyDictWriter(output_filename) for row in result: writer.writerow(row)
def parse_budget(self, filename, year, action): logging.info(f"[Budget-CE] Parsing budget {filename}") # First, import the table and clean up not desired lines table = rows.import_from_xls(filename) result = [] for row in table: if row.codigo.lower().strip() == "total geral": break result.append(row._asdict()) # Then, import desired lines parsing some columns as Decimals table = rows.import_from_dicts( result, force_types={ "lei": BRDecimalField, "lei_cred": BRDecimalField, "empenhado": BRDecimalField, "pago": BRDecimalField, "emp": BRDecimalField, "pago_2": BRDecimalField, }, ) result = [] for row in table: row = row._asdict() action_code = row.pop("codigo") assert str(action_code) == str(action) row.update({ "ano": year, "codigo_acao": action, "estado": "CE", }) result.append(row) return result
def test_export_to_xls_filename(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_xls(utils.table, temp.name) table = rows.import_from_xls(temp.name) self.assert_table_equal(table, utils.table)
def test_start_and_end_row(self, mocked_create_table): rows.import_from_xls( self.filename, start_row=3, end_row=5, start_column=4, end_column=6, ) self.assertTrue(mocked_create_table.called) self.assertEqual(mocked_create_table.call_count, 1) call_args = mocked_create_table.call_args_list[0] expected_data = [ ['12.0%', '2050-01-02', '2050-01-02T23:45:31'], ['13.64%', '2015-08-18', '2015-08-18T22:21:33'], ['13.14%', '2015-03-04', '2015-03-04T16:00:01'], ] self.assertEqual(expected_data, call_args[0][0])
def abre_doc(self): if self.doc_tipo == 'xls': self.doc = rows.import_from_xls(self.doc) elif self.doc_tipo == 'xlsx': self.doc = rows.import_from_xlsx(self.doc) elif self.doc_tipo == 'ods': self.doc = rows.import_from_ods(self.doc) else: raise Exception('Erro ao abrir o arquivo.')
def test_import_from_xls_fobj(self): # TODO: may test with codecs.open passing an encoding with open(self.filename, 'rb') as fobj: table = rows.import_from_xls(fobj) self.assert_table_equal(table, utils.table) expected_meta = {'imported_from': 'xls', 'filename': self.filename,} self.assertEqual(table.meta, expected_meta)
def test_export_to_xls_fobj(self): # TODO: may test with codecs.open passing an encoding # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False, mode='wb') self.files_to_delete.append(temp.name) rows.export_to_xls(utils.table, temp.file) temp.file.close() table = rows.import_from_xls(temp.name) self.assert_table_equal(table, utils.table)
def test_import_from_xls_retrieve_desired_data(self, mocked_create_table): mocked_create_table.return_value = 42 # import using filename table_1 = rows.import_from_xls(self.filename) call_args = mocked_create_table.call_args_list[0] self.assert_create_table_data(call_args, expected_meta={'imported_from': 'xls', 'filename': self.filename, 'sheet_name': 'Sheet1',}) # import using fobj with open(self.filename, 'rb') as fobj: table_2 = rows.import_from_xls(fobj) call_args = mocked_create_table.call_args_list[1] self.assert_create_table_data(call_args, expected_meta={'imported_from': 'xls', 'filename': self.filename, 'sheet_name': 'Sheet1',})
def test_import_from_xls_filename(self): table = rows.import_from_xls(self.filename) self.assert_table_equal(table, utils.table) expected_meta = { 'imported_from': 'xls', 'filename': self.filename, } self.assertEqual(table.meta, expected_meta)
def read_data(self, *args, **kwargs): if kwargs["sheet_name"] is None: return [] else: kwargs["skip_header"] = False if "end_column" not in kwargs: # Avoid reading all columns (even if blank) kwargs["end_column"] = 50 return rows.import_from_xls(self.filename, *args, **kwargs)
def test_import_from_xls_uses_create_table(self, mocked_create_table): mocked_create_table.return_value = 42 kwargs = {'encoding': 'test', 'some_key': 123, 'other': 456, } result = rows.import_from_xls(self.filename, **kwargs) self.assertTrue(mocked_create_table.called) self.assertEqual(mocked_create_table.call_count, 1) self.assertEqual(result, 42) call = mocked_create_table.call_args kwargs['meta'] = {'imported_from': 'xls', 'filename': self.filename, } self.assertEqual(call[1], kwargs)
def test_issue_168(self): temp = tempfile.NamedTemporaryFile(delete=False) filename = '{}.{}'.format(temp.name, self.file_extension) self.files_to_delete.append(filename) table = rows.Table(fields= OrderedDict([('jsoncolumn', rows.fields.JSONField)])) table.append({'jsoncolumn': '{"python": 42}'}) rows.export_to_xls(table, filename) table2 = rows.import_from_xls(filename) self.assert_table_equal(table, table2)
def test_import_from_xls_fobj(self): # TODO: may test with codecs.open passing an encoding with open(self.filename, 'rb') as fobj: table = rows.import_from_xls(fobj) self.assert_table_equal(table, utils.table) expected_meta = { 'imported_from': 'xls', 'filename': self.filename, } self.assertEqual(table.meta, expected_meta)
def test_issue_168(self): temp = tempfile.NamedTemporaryFile(delete=False) filename = '{}.{}'.format(temp.name, self.file_extension) self.files_to_delete.append(filename) table = rows.Table(fields=OrderedDict([('jsoncolumn', rows.fields.JSONField)])) table.append({'jsoncolumn': '{"python": 42}'}) rows.export_to_xls(table, filename) table2 = rows.import_from_xls(filename) self.assert_table_equal(table, table2)
def import_xls(f_obj): content = f_obj.read() f_obj.seek(0) temp_xls = NamedTemporaryFile(suffix='.xls', delete=False) temp_xls.write(content) temp_xls.close() data = rows.import_from_xls(temp_xls) temp_file = Path(temp_xls.name) os.remove(temp_file) return data
def test_export_to_xls_filename(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_xls(utils.table, temp.name) table = rows.import_from_xls(temp.name) self.assert_table_equal(table, utils.table) temp.file.seek(0) result = temp.file.read() export_in_memory = rows.export_to_xls(utils.table, None) self.assertEqual(result, export_in_memory)
def import_xls(f_obj): content = f_obj.read() f_obj.seek(0) temp_xls = NamedTemporaryFile(suffix=".xls", delete=False) temp_xls.write(content) temp_xls.close() temp_file = Path(temp_xls.name) with open(temp_file, "rb") as temp_xls: data = rows.import_from_xls(temp_xls) os.remove(temp_file) return data
def test_import_from_xls_retrieve_desired_data(self, mocked_create_table): mocked_create_table.return_value = 42 # import using filename rows.import_from_xls(self.filename) call_args = mocked_create_table.call_args_list[0] self.assert_create_table_data(call_args, expected_meta={ 'imported_from': 'xls', 'filename': self.filename, 'sheet_name': 'Sheet1', }) # import using fobj with open(self.filename, 'rb') as fobj: rows.import_from_xls(fobj) call_args = mocked_create_table.call_args_list[1] self.assert_create_table_data(call_args, expected_meta={ 'imported_from': 'xls', 'filename': self.filename, 'sheet_name': 'Sheet1', })
def test_import_from_xls_uses_create_table(self, mocked_create_table): mocked_create_table.return_value = 42 kwargs = {"some_key": 123, "other": 456} result = rows.import_from_xls(self.filename, **kwargs) self.assertTrue(mocked_create_table.called) self.assertEqual(mocked_create_table.call_count, 1) self.assertEqual(result, 42) call = mocked_create_table.call_args kwargs["meta"] = { "imported_from": "xls", "filename": self.filename, "sheet_name": "Sheet1", } self.assertEqual(call[1], kwargs)
def extract(self): filename, metadata = self.filename, self.metadata # From 2017-11 to 2018-12, get data from Brasil.IO (in CSV) using the # `extract_magistrados` helper function. if "contracheque.csv" in filename.name: yield from extract_magistrados(filename, self.state) else: # TODO: check repeated months if metadata["ano"] == 2018 or (metadata["ano"] == 2017 and metadata["mes"] in (11, 12)): # Data already converted in contracheque.csv return extension = filename.name.split(".")[-1].lower() if extension == "xls": table = rows.import_from_xls(filename, start_row=3) for row in table: yield { "cargo": row.cargo_no_orgao, "nome": row.nome, "rendimento_bruto": row.total_decreditos_v, "rendimento_liquido": row.rendimentoliquido_xi, } elif extension == "ods": fields_1 = OrderedDict([ ("nome", rows.fields.TextField), ("lotacao", rows.fields.TextField), ("cargo_no_orgao", rows.fields.TextField), ("remuneracao_paradigma", MoneyField), ("vantagens_pessoais", MoneyField), ("subsidio_diferenca_de_subsidio_funcao_de_confianca_ou_cargo_em_comissao", MoneyField), ("auxilios_indenizacoes", MoneyField), ("auxilios_indenizacoes_pagto_tesouraria", MoneyField), ("vantagens_eventuais", MoneyField), ("decimo_terceiro_adiantamento2a_parcela", MoneyField), ("terco_constitucional_de_ferias", MoneyField), ("pensao_provisoria_de_montepio", MoneyField), ("total_de_creditos", MoneyField), ("previdencia_publica", MoneyField), ("imposto_de_renda", MoneyField), ("descontos_diversos", MoneyField), ("retencao_por_teto_constitucional", MoneyField), ("total_de_descontos_debitos", MoneyField), ("rendimento_liquido", MoneyField), ("remuneracao_do_orgao_de_origem", MoneyField), ("diarias", MoneyField), ]) fields_2 = OrderedDict([ ("matricula", rows.fields.TextField), ("nome", rows.fields.TextField), ("lotacao", rows.fields.TextField), ("cargo_no_orgao", rows.fields.TextField), ("remuneracao_paradigma", MoneyField), ("vantagens_pessoais", MoneyField), ("subsidio_diferenca_de_subsidio_funcao_de_confianca_ou_cargo_em_comissao", MoneyField), ("auxilios_indenizacoes", MoneyField), ("auxilios_indenizacoes_pagto_tesouraria", MoneyField), ("vantagens_eventuais", MoneyField), ("decimo_terceiro_adiantamento2a_parcela", MoneyField), ("terco_constitucional_de_ferias", MoneyField), ("pensao_provisoria_de_montepio", MoneyField), ("total_de_creditos", MoneyField), ("previdencia_publica", MoneyField), ("imposto_de_renda", MoneyField), ("descontos_diversos", MoneyField), ("retencao_por_teto_constitucional", MoneyField), ("total_de_descontos_debitos", MoneyField), ("rendimento_liquido", MoneyField), ("remuneracao_do_orgao_de_origem", MoneyField), ("diarias", MoneyField), ]) import decimal try: table = rows.import_from_ods(filename, start_row=3, end_column=20, skip_header=True, fields=fields_1) except (ValueError, decimal.InvalidOperation): # "Matricula" is hidden return # TODO: fix this case else: for row in table: yield { "nome": row.nome, "cargo": row.cargo_no_orgao, "rendimento_liquido": row.rendimento_liquido, "rendimento_bruto": row.total_de_creditos, } elif extension == "pdf": return
legal_representant ) else: docs_from_contract[doc.name] = { "documents": [str(doc.document).strip(".0")], "legal_representants": [ { "name": doc.legal_representant, "document": str(doc.legal_representant_document).strip(".0"), } ], } return docs_from_contract raw_contracts = rows.import_from_xls("data/contracts/city-hall-contracts-2016-2017.xls") raw_docs_from_contract = rows.import_from_csv( "data/contracts/documents-from-contracts.csv" ) documents = set(raw_contracts["cpfcnpj"]) docs_from_contract = consolidate_entities(raw_docs_from_contract) invalid_document = [] contracts = [] for contract in raw_contracts: contract_info = { "code": contract.no_contrato, "description": contract.objeto, "start_date": contract.inicio_do_contrato,