def persist_destination_data(page, table, page_size=30, source_schema='public', destination_schema='public', models_module='model'): source_session = get_source_session(source_schema, destination_schema) destination_session = get_destination_session() models = get_models_module(models_module) model = getattr(models, guess_model_name(table)) pk = [f"{table}.{col.name}" for col in inspect(model).primary_key] items = paginate(source_session.query(model).order_by(text(",".join(pk))), page, page_size).items sources = [deepcopy(row) for row in items] pb = ProgressBar(total=page_size+1, prefix=f'Page {page} Pid: {os.getpid()}') for source in sources: logger.info('merging data data %s', source) destination_session.merge(source) pb.next() destination_session.flush() destination_session.commit() destination_session.expunge_all() destination_session.close() pb.next() logger.info('persited page %d', page)
def fetch_despesas_deputados(self, anos, filepath='db.json.gz'): d_ids = self.get_ids() pbar_dep = ProgressBar(len(d_ids), prefix='Total Geral', suffix='', length=100) self._create_temp_dataframe() for d_id in d_ids: pbar_dep.next() self.set_anos(anos) self.set_itens(100) json_data = self.busca_despesas(d_id) total = self.get_total_paginas() if total == 0: continue json_filename = self._create_temp_data_file(total) #pbar = ProgressBar(total, prefix='Despesas Deputado {}'.format(d_id), length=50) while True: json_str = json.dumps(json_data['dados']) self._add_data_record(json_str[1:-1]) #pbar.next() if not self.has_next(): break json_data = self.next() df_tmp = self.to_pandas(json_filename) df_tmp['idDeputado'] = str(d_id) self._add_df_record(df_tmp) self._clear_temp_data_file() self._save_temp_dataframe(filepath)
def fetch_dados_proposicoes(self, filepath='db.json'): d_ids = self.get_ids() total = len(d_ids) print('Obtendo dados de %i proposições' % total) pbar = ProgressBar(total - 1, prefix='Dados Proposições', suffix='obtidos') json_file = tempfile.NamedTemporaryFile(mode='a', encoding='utf-8', delete=False) json_file.write('[') count = 0 for d_id in d_ids: json_data = self.busca_por_id(d_id) json_file.write(json.dumps(json_data['dados'])) pbar.next() if count != total - 1: json_file.write(",\n") if count % 50: json_file.flush() count += 1 json_file.write(']') json_file.flush() json_file.close() print('%i dados de propisições obtidos' % total) print('Convertendo para pandas Dataframe') self.to_pandas_json_file(json_file.name, filepath) os.remove(json_file.name)
def fetch_dados_deputados(self, filepath='db.json.gz'): d_ids = self.get_ids() total = len(d_ids) print('Obtendo dados de %i deputados' % total) pbar = ProgressBar(total - 1, prefix='Dados Deputados', suffix='obtidos') json_filename = self._create_temp_data_file(total) for d_id in d_ids: json_data = self.busca_por_id(d_id) self._add_data_record(json.dumps(json_data['dados'])) pbar.next() print('%i dados de deputados obtidos' % total) print('Convertendo para pandas Dataframe') self.to_pandas_json_file(json_filename, filepath) self._clear_temp_data_file()
def _get_ids(self): ids = set() self.set_itens(100) resp = self.busca_todos() total = self.get_total_paginas() pbar = ProgressBar(total - 1, prefix='ID Deputados', suffix='obtidos') while self.has_next(): for dep in resp['dados']: ids.add(dep['id']) pbar.next() resp = self.next() self.save_ids(ids) return ids
def migrate_data(c, tables, max_workers=30, page_size=30, source_schema='public', destination_schema='public'): source_session = lib.get_source_session(source_schema, destination_schema) with ProcessPoolExecutor(max_workers) as executor: for table in tables.split(','): model = getattr(lib.get_models_module(), lib.guess_model_name(table)) pages = int(source_session.query(model).count() / page_size) pb = ProgressBar(total=pages, prefix=f'Sending {pages} pages') for page in range(pages): logger.info('sending page %d', page + 1) executor.submit(lib.persist_destination_data, page + 1, table, page_size, source_schema, destination_schema) pb.next() source_session.expunge_all()
def get_ids(self): if self.has_ids_file(): return self.load_ids() ids = set() self.set_siglas_tipo(['PEC', 'PLP', 'PL', 'PLV', 'PDC', 'MPV']) self.set_data_inicio('1984-01-01') self.set_itens(100) resp = self.busca_todas() total = self.get_total_paginas() pbar = ProgressBar(total - 1, prefix='ID Proposições', suffix='obtidos') while self.has_next(): for prep in resp['dados']: ids.add(prep['id']) pbar.next() resp = self.next() self.save_ids(ids) return ids
def test_next_method(self, pb): pb = ProgressBar(self.total, fill='X') for i in range(50): pb.next() pb.generate_pbar.assert_called_with(pb, i + 1)