Beispiel #1
0
 def __get_conexao(self):
     tries = 1
     con = None
     while tries < 5:
         RetrieveMetadata.__thread_lock.acquire()
         try:
             usuario = Configuration.get_val(section_name='CONEXAO_ORACLE',
                                             val_name='usuario')
             senha = Configuration.get_val(section_name='CONEXAO_ORACLE',
                                           val_name='senha')
             host = Configuration.get_val(section_name='CONEXAO_ORACLE',
                                          val_name='host')
             servico = Configuration.get_val(section_name='CONEXAO_ORACLE',
                                             val_name='servico')
             url = usuario + '/' + senha + '@' + host + '/' + servico
             con = cx_Oracle.connect(url)
         except DatabaseError as e:
             print('Erro estabelecendo conexao com o banco de dados: ' +
                   str(e))
             RetrieveMetadata.__thread_lock.release()
             time.sleep(20)
         else:
             tries = 6
             RetrieveMetadata.__thread_lock.release()
         tries += 1
     return con
Beispiel #2
0
def executa_recuperacao_ft_ss_filiado_mes(uf):
    thread_max = int(Configuration.get_val(section_name='CONFIG', val_name='qtd_threads'))
    bucket_size = int(Configuration.get_val(section_name='CONFIG', val_name='bucket_size'))
    localidade = Localidade()
    prt = Partition()
    locais_votacao = localidade.get_localidades(uf)
    particoes = prt.get_partitions(os.path.dirname(__file__))
    total = len(particoes)
    __update_progress(__calcula_progress(1, total))

    thread_list = []

    bucket_section = localidade.bucketing_locais(bucket_size=bucket_size, locais=locais_votacao)
    count_partitions = 0
    for particao in particoes: # para todas as partições

        particao = particao.split()[0]

        nt = EscritaArquivos(particao, bucket_section, uf)
        thread_list.append(nt)
        nt.start()

        count_partitions += 1
        if len(thread_list) >= thread_max:
            for t in thread_list:
                t.join()
            thread_list = []
            __update_progress(__calcula_progress(count_partitions, total))

    if len(thread_list) > 0:
        for t in thread_list:
            count_partitions += 1
            t.join()
        __update_progress(__calcula_progress(count_partitions, total))
Beispiel #3
0
def get_conexao():
    usuario = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='usuario')
    senha = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='senha')
    host = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='host')
    servico = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='servico')
    url = usuario + '/' + senha + '@' + host + '/' + servico
    con = cx_Oracle.connect(url)
    return con
Beispiel #4
0
    def escreve_script(self, table_script):
        nome_arquivo = Configuration.get_val(
            section_name='SCRIPT_CRIACAO_HIVE', val_name='nome_arquivo')
        caminho = Configuration.get_val(section_name='SCRIPT_CRIACAO_HIVE',
                                        val_name='caminho_destino')
        nome_arquivo = os.path.join(caminho, nome_arquivo)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)
        fhandle.write(table_script)
        fhandle.write('\n' * 3)
        fhandle.close()
Beispiel #5
0
    def escreve_dados_arquivo(self, conexao, partition, secao, uf):
        raiz_nome_arquivo = Configuration.get_val(
            section_name='ARQUIVO_SITUACAO_ELEITOR',
            val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(
            section_name='ARQUIVO_SITUACAO_ELEITOR',
            val_name='caminho_destino')
        nome_arquivo = '{0}/{1}_{2}_{3}.arq'.format(caminho, raiz_nome_arquivo,
                                                    uf, partition)
        nome_arquivo_log = '{0}/log/{1}_{2}_{3}.log'.format(
            caminho, raiz_nome_arquivo, uf, partition)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)
        floghandle = open(nome_arquivo_log, 'w')
        try:
            in_vars = ','.join('%d' % i for i in secao)
            cursor = conexao.cursor()
            consulta = 'SELECT IDW_DATA, IDW_HIER_SECAO,IDW_GRP1_CARACT_ELEITOR,IDW_OCUPACAO,IDW_HIER_LOGRADOURO, \
                                   IDW_ELEITOR, IDW_ENDERECO_ELEITOR, NR_DAT_NASCIMENTO, NR_ANOMES, \
                                   CD_OBJETO_ELEITOR FROM ADMDMELEITOR.FT_SS_SITUACAO_ELEITOR PARTITION ({0}) \
                                   WHERE IDW_HIER_SECAO in ({1})'.format(
                partition, in_vars)
            cursor.execute(consulta)
            for idwData, idwHier, idwGrp, idwOcu, idwLogra, idwEleitor, idwEnd, datNasc, nrAnoMes, coObjEleitor \
                    in cursor:
                try:
                    linha = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (
                        str(idwData or ''), str(
                            idwHier or ''), str(idwGrp or ''), str(
                                idwOcu or ''), str(idwLogra or ''),
                        str(idwEleitor or ''), str(idwEnd or ''),
                        str(datNasc or ''), str(nrAnoMes or ''), coObjEleitor)
                    fhandle.write(linha)
                except TypeError:
                    err_line = 'valores: {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}\n'.\
                        format(idwData, idwHier, idwGrp, idwOcu, idwLogra, idwEleitor,
                               idwEnd, datNasc, nrAnoMes, coObjEleitor)
                    floghandle.write(err_line)

        except DatabaseError as e:
            msg = 'Erro ao executar consulta ao banco: {0}\n'.format(e)
            print(msg)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
    def executa_baixa(self, secoes):
        ttl_con = 1
        last_value = Geral.get_last_value('FT_EV_JUSTIFICATIVA',
                                          'IDW_DATA_JUSTIFICATIVA',
                                          'eleitor_analyst')
        con = Geral.get_conexao()
        qtd_iteracoes = int(
            Configuration.get_val(section_name='CONEXAO_ORACLE',
                                  val_name='qtd_iteracoes'))
        for secao in secoes:  # para todas as seções
            self.__escreve_dados_arquivo(con, secao, last_value[0][0])
            # quantidade máxima de iterações com uma mesma conexão
            if ttl_con >= qtd_iteracoes:
                con.close()
                con = Geral.get_conexao()
                if con is None:
                    print(
                        'Nao foi possivel recuperar conexao com o banco de dados: secao {0}'
                        .format(secao))
                    break
                ttl_con = 0
            ttl_con += 1

        if con is not None:
            con.close()
Beispiel #7
0
def executa_recuperacao_dm_registro_rae():
    thread_max = int(
        Configuration.get_val(section_name='CONFIG', val_name='qtd_threads'))
    prt = Partition()
    particoes = prt.get_partitions(os.path.dirname(__file__))
    total = len(particoes)
    update_progress(calcula_progress(1, total))

    thread_list = []

    count_partitions = 0
    for particao in particoes:  # para todas as partições
        particao = particao.split()[0]

        nt = EscritaArquivos(particao)
        thread_list.append(nt)
        nt.start()

        count_partitions += 1
        if len(thread_list) >= thread_max:
            for t in thread_list:
                t.join()
            thread_list = []
            update_progress(calcula_progress(count_partitions, total))

    if len(thread_list) > 0:
        for t in thread_list:
            count_partitions += 1
            t.join()
        update_progress(calcula_progress(count_partitions, total))
Beispiel #8
0
 def __list_files(ini_section_name):
     mypath = Configuration.get_val(section_name=ini_section_name,
                                    val_name='caminho_destino')
     if not mypath:
         return []
     files = [f for f in fnmatch.filter(listdir(mypath), '*.arq')]
     return files
    def __escreve_dados_arquivo(self, conexao, secao, idw_data):
        raiz_nome_arquivo = Configuration.get_val(
            section_name='ARQUIVO_FT_EV_JUSTIFICATIVA',
            val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(
            section_name='ARQUIVO_FT_EV_JUSTIFICATIVA',
            val_name='caminho_destino')
        nome_arquivo = '{0}/{1}.arq'.format(caminho, raiz_nome_arquivo)
        nome_arquivo_log = '{0}/log/{1}.log'.format(caminho, raiz_nome_arquivo)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)

        append_write = 'w'
        if os.path.exists(nome_arquivo_log):
            append_write = 'a'
        floghandle = open(nome_arquivo_log, append_write)
        try:
            in_vars = ','.join('%d' % i for i in secao)
            cursor = conexao.cursor()
            consulta = 'select * from admdmeleitor.FT_EV_JUSTIFICATIVA just \
                        where JUST.IDW_DATA_JUSTIFICATIVA > {0}  \
                        and just.DM_HIER_SECAO in ({1})'.format(
                idw_data, in_vars)
            cursor.execute(consulta)
            for tuple in cursor:
                try:
                    linha = ','.join('%s' % (str(i or '')) for i in tuple)
                    linha += '\n'
                    fhandle.write(linha)
                except TypeError:
                    err_line = ','.join('%s' % (str(i or '')) for i in tuple)
                    err_line += '\n'
                    floghandle.write(err_line)

        except DatabaseError as e:
            momento = 'd%02d%02d%-02d%02d%02d%'.format(
                datetime.year, datetime.month, datetime.day, datetime.hour,
                datetime.minute, datetime.second)
            msg = '{0} - Erro ao executar consulta ao banco: {1}\n'.format(
                momento, e)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
Beispiel #10
0
    def __escreve_dados_arquivo(self, conexao, partition, secao, uf):
        raiz_nome_arquivo = Configuration.get_val(section_name='ARQUIVO_FILIADO_MES',
                                                  val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(section_name='ARQUIVO_FILIADO_MES', val_name='caminho_destino')
        nome_arquivo = '{0}/{1}_{2}_{3}.arq'.format(caminho, raiz_nome_arquivo, uf, partition)
        nome_arquivo_log = '{0}/log/{1}_{2}_{3}.log'.format(caminho, raiz_nome_arquivo, uf, partition)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)
        floghandle = open(nome_arquivo_log, 'w')
        try:
            in_vars = ','.join('%d' % i for i in secao)
            cursor = conexao.cursor()
            consulta = 'SELECT IDW_MES, \
                       IDW_FILIADO, \
                       IDW_HIER_LOCALIDADE, \
                       IDW_HIER_ZONA, \
                       IDW_PARTIDO, \
                       IDW_LISTA_FILIACAO, \
                       NR_ANO_MES \
                       FROM ADMDMELEITOR.FT_SS_FILIADO_MES PARTITION ({0}) \
                       WHERE IDW_HIER_LOCALIDADE in ({1})'.format(partition, in_vars)
            cursor.execute(consulta)
            for idw_mes, idw_filiado, idw_h_loc, idw_h_zona, idw_partido, idw_ls_filiacao, nr_anomes in cursor:
                try:
                    linha = '%s,%s,%s,%s,%s,%s,%s\n' % (str(idw_mes or ''), str(idw_filiado or ''),
                                                        str(idw_h_loc or ''), str(idw_h_zona or ''),
                                                        str(idw_partido or ''), str(idw_ls_filiacao or ''),
                                                        str(nr_anomes or ''))
                    fhandle.write(linha)
                except TypeError:
                    err_line = 'valores: {0}, {1}, {2}, {3}, {4}, {5}, {6}\n'.\
                        format(str(idw_mes or ''), str(idw_filiado or ''),
                               str(idw_h_loc or ''), str(idw_h_zona or ''),
                               str(idw_partido or ''), str(idw_ls_filiacao or ''),
                               str(nr_anomes or ''))
                    floghandle.write(err_line)

        except DatabaseError as e:
            msg = 'Erro ao executar consulta ao banco: {0}\n'.format(e)
            print(msg)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
def roda():
    secoes = __get_secoes()
    sc = Secao()
    bucket_size = int(
        Configuration.get_val(section_name='CONFIG', val_name='bucket_size'))
    bucket_section = sc.bucketing_sections(bucketSize=bucket_size,
                                           secoes=secoes)
    atualizacao = AtualizaFtEvJustificativa()
    atualizacao.executa_baixa(bucket_section)
Beispiel #12
0
    def get_zonas(self, uf):
        usuario = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='usuario')
        senha = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='senha')
        host = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='host')
        servico = Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='servico')
        url = usuario + '/' + senha + '@' + host + '/' + servico
        con = cx_Oracle.connect(url)

        cursor = con.cursor()

        ## idw_uf DF - 3; idw_uf ZZ 26
        cursor.execute('select IDW_HIER_SECAO from ADMDMELEITOR.DM_HIER_SECAO \
                        where idw_uf = :idUf order by IDW_HIER_SECAO', idUf=uf)
        retorno = []
        for secao in cursor:
            retorno.append(secao)
        con.close()
        return retorno
    def escreve_dados_arquivo(self, conexao, idw_data, config, consulta):
        raiz_nome_arquivo = Configuration.get_val(section_name=config,
                                                  val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(section_name=config,
                                        val_name='caminho_destino')
        nome_arquivo = '{0}/{1}.arq'.format(caminho, raiz_nome_arquivo)
        nome_arquivo_log = '{0}/log/{1}.log'.format(caminho, raiz_nome_arquivo)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)

        append_write = 'w'
        if os.path.exists(nome_arquivo_log):
            append_write = 'a'
        floghandle = open(nome_arquivo_log, append_write)
        try:
            cursor = conexao.cursor()
            consulta = consulta.format(idw_data)
            cursor.execute(consulta)
            for tupla in cursor:
                try:
                    linha = ','.join('%s' % (str(i or '')).replace(',', '')
                                     for i in tupla)
                    linha += '\n'
                    fhandle.write(linha)
                except TypeError:
                    err_line = ','.join('%s' % (str(i or '')) for i in tupla)
                    err_line += '\n'
                    floghandle.write(err_line)

        except DatabaseError as e:
            momento = 'd%02d%02d%-02d%02d%02d%'.format(
                datetime.year, datetime.month, datetime.day, datetime.hour,
                datetime.minute, datetime.second)
            msg = '{0} - Erro ao executar consulta ao banco: {1}\n'.format(
                momento, e)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
Beispiel #14
0
    def __escreve_dados_arquivo(self, conexao, partition, secao, uf):
        raiz_nome_arquivo = Configuration.get_val(
            section_name='ARQUIVO_ABSTENCAO', val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(section_name='ARQUIVO_ABSTENCAO',
                                        val_name='caminho_destino')
        nome_arquivo = '{0}/{1}_{2}_{3}.arq'.format(caminho, raiz_nome_arquivo,
                                                    uf, partition)
        nome_arquivo_log = '{0}/log/{1}_{2}_{3}.log'.format(
            caminho, raiz_nome_arquivo, uf, partition)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)
        floghandle = open(nome_arquivo_log, 'w')
        try:
            in_vars = ','.join('%d' % i for i in secao)
            cursor = conexao.cursor()
            consulta = 'SELECT * \
                       FROM ADMDMELEITOR.FT_EV_ABSTENCAO PARTITION ({0}) \
                       WHERE IDW_HIER_LOCALIDADE in ({1})'.format(
                partition, in_vars)
            cursor.execute(consulta)
            for tuple in cursor:
                try:
                    linha = ','.join('%s' % (str(i or '')) for i in tuple)
                    linha += '\n'
                    fhandle.write(linha)
                except TypeError:
                    err_line = ','.join('%s' % (str(i or '')) for i in tuple)
                    err_line += '\n'
                    floghandle.write(err_line)

        except DatabaseError as e:
            msg = 'Erro ao executar consulta ao banco: {0}\n'.format(e)
            print(msg)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
Beispiel #15
0
def executa_recuperacao_ft_ss_situacao_eleitor(uf):
    thread_max = int(
        Configuration.get_val(section_name='CONFIG', val_name='qtd_threads'))
    bucket_size = int(
        Configuration.get_val(section_name='CONFIG', val_name='bucket_size'))
    sc = Secao()
    prt = Partition()
    secoes = sc.get_zonas(uf)
    particoes = prt.get_partitions(os.path.dirname(__file__))
    total = len(particoes)
    update_progress(calcula_progress(1, total))

    thread_list = []

    bucket_section = sc.bucketing_sections(bucketSize=bucket_size,
                                           secoes=secoes)
    count_partitions = 0
    for particao in particoes:  # para todas as partições

        particao = particao.split()[0]

        nt = EscritaArquivos(particao, bucket_section, uf)
        thread_list.append(nt)
        nt.start()

        count_partitions += 1
        if len(thread_list) >= thread_max:
            for t in thread_list:
                t.join()
            thread_list = []
            update_progress(calcula_progress(count_partitions, total))

    if len(thread_list) > 0:
        for t in thread_list:
            count_partitions += 1
            t.join()
        update_progress(calcula_progress(count_partitions, total))
Beispiel #16
0
    def escreve_dados_arquivo(self, conexao, partition):
        raiz_nome_arquivo = Configuration.get_val(
            section_name='ARQUIVO_REGISTRO_RAE', val_name='raiz_nome_arquivo')
        caminho = Configuration.get_val(section_name='ARQUIVO_REGISTRO_RAE',
                                        val_name='caminho_destino')
        nome_arquivo = '{0}/{1}_{2}.arq'.format(caminho, raiz_nome_arquivo,
                                                partition)
        nome_arquivo_log = '{0}/log/{1}_{2}.log'.format(
            caminho, raiz_nome_arquivo, partition)
        append_write = 'w'
        if os.path.exists(nome_arquivo):
            append_write = 'a'

        fhandle = open(nome_arquivo, append_write)
        floghandle = open(nome_arquivo_log, 'w')
        try:
            cursor = conexao.cursor()
            consulta = 'SELECT * FROM ADMDMELEITOR.DM_REGISTRO_RAE PARTITION ({0})'.format(
                partition)
            cursor.execute(consulta)
            for tuple in cursor:
                try:
                    linha = ','.join('%s' % str(i or '') for i in tuple)
                    linha += '\n'
                    fhandle.write(linha)
                except TypeError:
                    floghandle.write(tuple)
                    floghandle.write('\n')

        except DatabaseError as e:
            msg = 'Erro ao executar consulta ao banco: {0}\n'.format(e)
            print(msg)
            floghandle.write(msg)
        finally:
            fhandle.close()
            floghandle.close()
Beispiel #17
0
    def run(self):
        # execucao paralela para recuperacao de particoes
        ttl_con = 1
        con = self.__get_conexao()
        qtd_iteracoes = int(Configuration.get_val(section_name='CONEXAO_ORACLE', val_name='qtd_iteracoes'))
        for local in self.locais:  # para todas o locais
            self.__escreve_dados_arquivo(con, self.partitionExecution, local, self.uf)
            # quantidade máxima de iterações com uma mesma conexão
            if ttl_con >= qtd_iteracoes:
                con.close()
                con = self.__get_conexao()
                if con is None:
                    print('Nao foi possivel recuperar conexao com o banco de dados: particao {0}, secao {1}'.
                          format(self.partitionExecution, local))
                    break
                ttl_con = 0

            ttl_con += 1
Beispiel #18
0
    def get_last_value(table_name, column_name, role=None):
        ret_values = []
        hive_server = Configuration.get_val(section_name='CONEXAO_HIVE', val_name='host')
        hive_port = int(Configuration.get_val(section_name='CONEXAO_HIVE', val_name='port'))
        hive_user = Configuration.get_val(section_name='CONEXAO_HIVE', val_name='usuario')
        hive_pass = Configuration.get_val(section_name='CONEXAO_HIVE', val_name='senha')
        ldap_domain = Configuration.get_val(section_name='CONEXAO_HIVE', val_name='domain')
        hive_database = Configuration.get_val(section_name='CONEXAO_HIVE', val_name='database')

        conn = hive.connect(host=hive_server,
                            port=hive_port,
                            authMechanism='LDAP',
                            user=hive_user +'@'+ldap_domain,
                            password=hive_pass,
                            database=hive_database)
        cursor = conn.cursor()
        if not role is None:
            cursor.execute('set role ' + role)
        cursor.execute('select MAX({0}) AS MAX_ID from {1}'.format(column_name, table_name))
        for val in cursor.fetch():
            ret_values.append(val)
        conn.close()

        return ret_values
Beispiel #19
0
    def importa_dados_hdfs(self, opt, ini_section_name):
        '''
        Import the data from ELEITOR database into HDFS.
        :param opt: 1 - Import, 2 - Update
        :param ini_section_name: define the section in the ini file
        :return
        '''
        files = ImportDadosHdfs.__list_files(ini_section_name)
        for f in files:
            print(f)

        hadoop_file = Configuration.get_val(section_name=ini_section_name,
                                            val_name='arquivo_hadoop')
        if not hadoop_file:
            print(
                'It was not possible to find hadoop file path on ini file according to the section {0}'
                .format(ini_section_name))
            exit(1)

        path_to_files = Configuration.get_val(section_name=ini_section_name,
                                              val_name='caminho_destino')

        print('Checking hadoop file {0}'.format(hadoop_file))

        (ret, out, err) = ImportDadosHdfs.__run_cmd(
            ['hdfs', 'dfs', '-test', '-e', hadoop_file])

        if opt == 1:
            if ret == 0:
                print(
                    'Haddoop file already exists. Removing existing hadoop file {0}'
                    .format(hadoop_file))
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['hdfs', 'dfs', '-rm', '-r', '-skipTrash', hadoop_file])
            elif err:
                print('Error testing hadoop file {0} existence'.format(
                    hadoop_file))
                print('Error: ' + str(err))
                return

            print('Creating a new directory on hadoop')
            (ret, out, err) = ImportDadosHdfs.__run_cmd(
                ['hdfs', 'dfs', '-mkdir', hadoop_file])
            if ret != 0:
                print('Error creating new file {0}'.format(hadoop_file))
                print('Error message: ' + str(err))
        elif opt == 2:
            if ret != 0:
                print('Creating a new directory on hadoop')
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['hdfs', 'dfs', '-mkdir', hadoop_file])
                if ret != 0:
                    print('Error creating new file {0}'.format(hadoop_file))
                    print('Error message: ' + str(err))

        qtd = 0
        for f in files:
            qtd += 1
            full_name = join(path_to_files, f)
            path_name_ok = join(path_to_files, 'ok')
            path_name_error = join(path_to_files, 'error')
            lst_number = f.split('_')
            lst_number = lst_number[len(lst_number) - 1]
            part_name = join(hadoop_file, lst_number)
            print('Importing file: {0} to {1} - {2}/{3}'.format(
                full_name, part_name, qtd, len(files)))
            if opt == 1:
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['hdfs', 'dfs', '-put', full_name, part_name])
            elif opt == 2:
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['hdfs', 'dfs', '-appendToFile', full_name, part_name])
            if ret == 0:
                now = datetime.datetime.now()
                date_tag = '%d%02d%02d-%02d%02d' % (
                    now.year, now.month, now.day, now.hour, now.minute)
                print('Import done successfully')
                print('Moving file to ok directory')
                full_name_ok = join(path_name_ok, f)
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['mv', full_name, full_name_ok])
                (ret, out, err) = ImportDadosHdfs.__run_cmd([
                    'tar', '-czf', full_name_ok + '_' + date_tag + '.tar.gz',
                    full_name_ok
                ])
                (ret, out,
                 err) = ImportDadosHdfs.__run_cmd(['rm', '-f', full_name_ok])
            else:
                print('Error appending to hadoop new file: ' + str(err))
                full_name_error = join(path_name_error, f)
                full_name_error_log = join(path_name_error, f) + '.log'
                (ret, out, err) = ImportDadosHdfs.__run_cmd(
                    ['mv', full_name, full_name_error])
                append_write = 'w'
                if os.path.exists(full_name_error_log):
                    append_write = 'a'

                fhandle = open(full_name_error_log, append_write)
                fhandle.write(out)
                fhandle.close()

                ImportDadosHdfs.__update_progress(
                    ImportDadosHdfs.__get_percent_completado(qtd, len(files)))
Beispiel #20
0
    def importa_dados_hdfs(self):
        files = self.__list_files()
        for f in files:
            print(f)

        hadoop_file = Configuration.get_val(
            section_name='ARQUIVO_SITUACAO_ELEITOR', val_name='arquivo_hadoop')
        path_to_files = Configuration.get_val(
            section_name='ARQUIVO_SITUACAO_ELEITOR',
            val_name='caminho_destino')

        print('Checking hadoop file {0}'.format(hadoop_file))

        (ret, out,
         err) = self.__run_cmd(['hdfs', 'dfs', '-test', '-e', hadoop_file])

        if ret == 0:
            print(
                'Haddoop file already exists. Removing existing hadoop file {0}'
                .format(hadoop_file))
            (ret, out, err) = self.__run_cmd(
                ['hdfs', 'dfs', '-rm', '-r', '-skipTrash', hadoop_file])
        elif err:
            print(
                'Error testing hadoop file {0} existence'.format(hadoop_file))
            print('Error: ' + str(err))
            return

        print('Creating a new directory on hadoop')
        (ret, out,
         err) = self.__run_cmd(['hdfs', 'dfs', '-mkdir', hadoop_file])
        if ret != 0:
            print('Error creating new file {0}'.format(hadoop_file))
            print('Error message: ' + str(err))

        qtd = 0
        for f in files:
            qtd += 1
            full_name = join(path_to_files, f)
            path_name_ok = join(path_to_files, 'ok')
            path_name_error = join(path_to_files, 'error')
            lst_number = f.split('_')
            lst_number = lst_number[len(lst_number) - 1]
            part_name = join(hadoop_file, lst_number)
            print('Importing file: {0} to {1}'.format(full_name, part_name))
            (ret, out, err) = self.__run_cmd(
                ['hdfs', 'dfs', '-put', full_name, part_name])
            if ret == 0:
                print('Import done successfully')
                print('Moving file to ok directory')
                full_name_ok = join(path_name_ok, f)
                (ret, out,
                 err) = self.__run_cmd(['mv', full_name, full_name_ok])
                (ret, out, err) = self.__run_cmd(
                    ['tar', '-czf', full_name_ok + '.tar.gz', full_name_ok])
                (ret, out, err) = self.__run_cmd(['rm', '-f', full_name_ok])
            else:
                print('Error appending to hadoop new file: ' + str(err))
                full_name_error = join(path_name_error, f)
                full_name_error_log = join(path_name_error, f) + '.log'
                (ret, out,
                 err) = self.__run_cmd(['mv', full_name, full_name_error])
                append_write = 'w'
                if os.path.exists(full_name_error_log):
                    append_write = 'a'

                fhandle = open(full_name_error_log, append_write)
                fhandle.write(out)
                fhandle.close()

                self.__update_progress(
                    self.__get_percent_completado(qtd, len(files)))
Beispiel #21
0
 def __list_files(self):
     mypath = Configuration.get_val(section_name='ARQUIVO_SITUACAO_ELEITOR',
                                    val_name='caminho_destino')
     files = [f for f in fnmatch.filter(listdir(mypath), '*.arq')]
     return files