Ejemplo n.º 1
0
def adjust_type(df, tb_name):
    logger = logging.getLogger('adjust type')
    for meta in dataset.consult(tb_name)['fields'].values():
        nome_banco = meta['nome_banco']
        tipo = meta['tipo']
        date_format = meta['date_format']
        replace_null = meta['replace_null']
        if tipo == 'date':
            try:
                df[nome_banco] = df[nome_banco].apply(
                    lambda x: pd.to_datetime(replace_nan(x, replace_null),
                                             format=date_format,
                                             errors='coerce'))
                logger.info(f'type date sucess in {nome_banco}')
            except ValueError as ve:
                logger.info(f'{ve} type date dont recognize in {nome_banco}')
                raise
        elif tipo == 'int':
            try:
                df[nome_banco] = df[nome_banco].apply(
                    lambda x: replace_nan(x, replace_null)).astype(int)
                logger.info(f'type int sucess in {nome_banco}')
            except ValueError as ve:
                logger.info(f'{ve} type int dont recognize in {nome_banco}')
                raise
        elif tipo == 'float':
            try:
                df[nome_banco] = df[nome_banco].apply(
                    lambda x: replace_nan(x, replace_null)).astype(float)
                logger.info(f'type float sucess in {nome_banco}')
            except ValueError as ve:
                logger.info(f'{ve} type float dont recognize in {nome_banco}')
                raise
    return df
Ejemplo n.º 2
0
def validator(df, tb_name):
    l_validator = []
    l_val = []
    for meta in dataset.consult(tb_name)['fields'].values():
        nome_banco = meta['nome_banco']
        nullable = meta['nullable']
        replace_null = meta['replace_null']
        tipo = meta['tipo']
        date_format = meta['date_format']
        regex = meta['regex']

        try:
            int(nome_banco)
        except ValueError:
            try:
                if (valnull(nullable, replace_null, df[nome_banco])
                        and valtype(tipo, date_format, replace_null,
                                    df[nome_banco])
                        and valregex(regex, nullable, replace_null,
                                     df[nome_banco])):
                    l_validator.append(True)
                    l_val.append({nome_banco: True})
                else:
                    l_validator.append(False)
                    l_val.append({nome_banco: False})
            except:
                l_validator.append(False)
                l_val.append({nome_banco: False})
    if False in l_validator:
        return False, l_val
    else:
        return True, l_val
Ejemplo n.º 3
0
    def transient2raw(self):
        tb_cols = [
            col['nome_banco']
            for col in dataset.consult(self.table_nm)['fields'].values()
        ]
        self.logger.info(f'List of columns dictionary {tb_cols}')
        lcol = [x for x in self.df.columns if x < len(tb_cols)]
        dict_cols = dict(zip(lcol, tb_cols))
        self.logger.info(f'Create Dictionary Cols {self.table_nm}')

        self.df_out.append(self.df.rename(columns=dict_cols))
        self.logger.info(f'Rename Cols {self.table_nm}')
        return self.df_out
Ejemplo n.º 4
0
    def execute(self, context):
        ldatalake = [file['name'] for file in self.client.list_blobs()]
        self.logger.info('Create Datalake List')

        llocal = [
            os.path.join(currentpath, file).replace(self.source, '')[1:]
            for currentpath, folders, files in os.walk(self.source)
            for file in files if file != 'diff.txt' and file.find('meta') < 0
        ]
        self.logger.info(f'Create Local List {llocal}')

        ldiffinc = [
            file for file in llocal
            if dataset.consult(file)['typerun'] == 'increment'
            and file not in ldatalake
        ]
        ldifffull = [
            file for file in llocal
            if dataset.consult(file)['typerun'] == 'full'
        ]
        ldiff = ldiffinc + ldifffull
        self.logger.info(f'Genereate Diff List {ldiff}')
        return util.chunklist(ldiff, self.num_workers)
Ejemplo n.º 5
0
 def _comparedb(self, ldiff):
     lvalidate = []
     dictmax = {}
     for file in set(ldiff):
         tbname = f"bt_{dataset.tbname (file)}"
         tprun = dataset.consult(file)['typerun']
         if tprun == 'increment':
             try:
                 maxval = dictmax[tbname]
             except:
                 conn = self.engine.connect()
                 selectmax = f'select max(mes_competencia) as  mes_competencia from dictas.{tbname}'
                 maxval = conn.execute(selectmax).fetchall()[0][0]
                 dictmax.update({tbname: maxval})
             filedate = datetime.strptime(
                 re.search(r"(\d+)",
                           file.split('/')[1].replace('-', '')).group(),
                 '%m%Y').date()
             if filedate > maxval:
                 lvalidate.append(file)
         else:
             lvalidate.append(file)
     self.logger.info(f'Diff validate {lvalidate}')
     return lvalidate
Ejemplo n.º 6
0
 def check_exists(nmfile):
     if dataset.consult(nmfile)['typerun'] == 'full':
         return 'replace'
     else:
         return 'append'