def persist_destination_data(page, table, page_size=30, source_schema='public', destination_schema='public',
                             models_module='model'):
    source_session = get_source_session(source_schema, destination_schema)
    destination_session = get_destination_session()
    models = get_models_module(models_module)

    model = getattr(models, guess_model_name(table))

    pk = [f"{table}.{col.name}" for col in inspect(model).primary_key]
    items = paginate(source_session.query(model).order_by(text(",".join(pk))), page, page_size).items
    sources = [deepcopy(row) for row in items]

    pb = ProgressBar(total=page_size+1, prefix=f'Page {page} Pid: {os.getpid()}')

    for source in sources:
        logger.info('merging data data %s', source)
        destination_session.merge(source)
        pb.next()

    destination_session.flush()
    destination_session.commit()
    destination_session.expunge_all()
    destination_session.close()
    pb.next()
    logger.info('persited page %d', page)
Example #2
0
    def fetch_despesas_deputados(self, anos, filepath='db.json.gz'):
        d_ids = self.get_ids()
        pbar_dep = ProgressBar(len(d_ids), prefix='Total Geral', suffix='', length=100)
        self._create_temp_dataframe()

        for d_id in d_ids:
            pbar_dep.next()
            self.set_anos(anos)
            self.set_itens(100)
            json_data = self.busca_despesas(d_id)
            total = self.get_total_paginas()
            if total == 0:
                continue
            json_filename = self._create_temp_data_file(total)
            #pbar = ProgressBar(total, prefix='Despesas Deputado {}'.format(d_id), length=50)
            while True:
                json_str = json.dumps(json_data['dados'])
                self._add_data_record(json_str[1:-1])
                #pbar.next()
                if not self.has_next():
                    break
                json_data = self.next()

            df_tmp = self.to_pandas(json_filename)
            df_tmp['idDeputado'] = str(d_id)
            self._add_df_record(df_tmp)
            self._clear_temp_data_file()

        self._save_temp_dataframe(filepath)
Example #3
0
    def fetch_dados_proposicoes(self, filepath='db.json'):
        d_ids = self.get_ids()
        total = len(d_ids)
        print('Obtendo dados de %i proposições' % total)
        pbar = ProgressBar(total - 1,
                           prefix='Dados Proposições',
                           suffix='obtidos')
        json_file = tempfile.NamedTemporaryFile(mode='a',
                                                encoding='utf-8',
                                                delete=False)
        json_file.write('[')
        count = 0
        for d_id in d_ids:
            json_data = self.busca_por_id(d_id)
            json_file.write(json.dumps(json_data['dados']))
            pbar.next()

            if count != total - 1:
                json_file.write(",\n")
            if count % 50:
                json_file.flush()
            count += 1

        json_file.write(']')
        json_file.flush()
        json_file.close()
        print('%i dados de propisições obtidos' % total)
        print('Convertendo para pandas Dataframe')
        self.to_pandas_json_file(json_file.name, filepath)
        os.remove(json_file.name)
Example #4
0
    def fetch_dados_deputados(self, filepath='db.json.gz'):
        d_ids = self.get_ids()
        total = len(d_ids)
        print('Obtendo dados de %i deputados' % total)
        pbar = ProgressBar(total - 1, prefix='Dados Deputados', suffix='obtidos')
        json_filename = self._create_temp_data_file(total)
        for d_id in d_ids:
            json_data = self.busca_por_id(d_id)
            self._add_data_record(json.dumps(json_data['dados']))
            pbar.next()

        print('%i dados de deputados obtidos' % total)
        print('Convertendo para pandas Dataframe')
        self.to_pandas_json_file(json_filename, filepath)
        self._clear_temp_data_file()
Example #5
0
    def _get_ids(self):
        
        ids = set()
        self.set_itens(100)
        resp = self.busca_todos()
        total = self.get_total_paginas()
        pbar = ProgressBar(total - 1, prefix='ID Deputados', suffix='obtidos')

        while self.has_next():
            for dep in resp['dados']:
                ids.add(dep['id'])
            pbar.next()
            resp = self.next()

        self.save_ids(ids)
        return ids
def migrate_data(c,
                 tables,
                 max_workers=30,
                 page_size=30,
                 source_schema='public',
                 destination_schema='public'):
    source_session = lib.get_source_session(source_schema, destination_schema)

    with ProcessPoolExecutor(max_workers) as executor:
        for table in tables.split(','):
            model = getattr(lib.get_models_module(),
                            lib.guess_model_name(table))
            pages = int(source_session.query(model).count() / page_size)
            pb = ProgressBar(total=pages, prefix=f'Sending {pages} pages')

            for page in range(pages):
                logger.info('sending page %d', page + 1)
                executor.submit(lib.persist_destination_data, page + 1, table,
                                page_size, source_schema, destination_schema)
                pb.next()
                source_session.expunge_all()
Example #7
0
    def get_ids(self):
        if self.has_ids_file():
            return self.load_ids()

        ids = set()
        self.set_siglas_tipo(['PEC', 'PLP', 'PL', 'PLV', 'PDC', 'MPV'])
        self.set_data_inicio('1984-01-01')
        self.set_itens(100)

        resp = self.busca_todas()
        total = self.get_total_paginas()
        pbar = ProgressBar(total - 1,
                           prefix='ID Proposições',
                           suffix='obtidos')

        while self.has_next():
            for prep in resp['dados']:
                ids.add(prep['id'])
            pbar.next()
            resp = self.next()

        self.save_ids(ids)
        return ids
Example #8
0
 def test_next_method(self, pb):
     pb = ProgressBar(self.total, fill='X')
     for i in range(50):
         pb.next()
         pb.generate_pbar.assert_called_with(pb, i + 1)