Ejemplo n.º 1
0
shutil.copyfile(os.path.join(TRAIN_DIR, 'etl.sql'),
                os.path.join(PREDICT_DIR, 'etl.sql'))
query = utils.import_query(os.path.join(PREDICT_DIR, 'etl.sql'))

print('\nImportando modelo...')
model = pd.read_pickle(os.path.join(MODELS_DIR, 'models.pkl'))
print('Pronto.')

print('\nAbrindo conexão...')
conn = utils.connect_db('sqlite', path=DB_PATH)
print('Pronto.')

print('\nFazendo ETL...')
query = query.format(date=args.date, stage='PREDICT')
utils.execute_many_sql(query, conn)
df = pd.read_sql_table('PRE_ABT_PREDICT_CHURN', conn)
print('Pronto.')

print('\nRealizando predições...')
df['churn_prob'] = model['models'].predict_proba(df[model['features']])[:, 1]
print('Pronto.')

print('\nSalvando base escora...')
table = df[['churn_prob', 'seller_id']]
if args.export == 'sqlite':
    table.to_sql('tb_churn_score', conn)

elif args.export == 'csv':
    table.to_csv(os.path.join(DATA_DIR, 'tb_churn_score.csv'), index=False)
print('Pronto\n')
Ejemplo n.º 2
0
# Calculo da quantidade maxima de dias para revenda
days_between = datetime.datetime.strptime( args.date_end, "%Y-%m-%d" ) - datetime.datetime.strptime( date_init, "%Y-%m-%d" )
days_between = days_between.days

print("\n\n Criando tabela axuliar...")
# identificação de todas possíveis categorias
categories_query = utils.import_query( os.path.join(TRAIN_DIR, 'categories.sql') )
categories = pd.read_sql_query( categories_query, con )['categories'].tolist()

# criação de tabela com info de dia a dia por catetoria
df_days = pd.DataFrame()
for c in categories:
    df_tmp = pd.DataFrame( {"days": list(range(1, days_between+1)),
                            'category': [c] * days_between  } )
    df_days = df_days.append( df_tmp, ignore_index=True )
df_days.to_sql( 'tb_days_between', con, if_exists='replace', index=False )
print("Ok.")

# Query de modelagem
print("\n\n Executando o ETL...")
query = utils.import_query( os.path.join(TRAIN_DIR, 'lifetime.sql') ) # Importa a nossa query
query = query.format( date_init = date_init, date_end = args.date_end )
utils.execute_many_sql( query, con, verbose=True )
print("\n Ok.")

if args.backup:
    file_name = 'lifetime_{date_init}_{date_end}.csv'.format( date_init=date_init.replace("-",""),
                                                              date_end=args.date_end.replace("-","") )
    df = pd.read_sql_table( 'tb_model_lifetime', con ) # Executa a query dentro do banco
    df.to_csv( os.path.join(DATA_DIR, file_name), sep=",", index=False) # Salvando em um csv...
Ejemplo n.º 3
0
print("\n Abrindo conexão com banco de dados...")
con = utils.connect_db('sqlite', path=DB_PATH)
print(" Ok.")

print("\n Executando a extração dos dados...")
# Query de features base
query_etl_base = utils.import_query(os.path.join(TRAIN_DIR, 'etl.sql'))

# Query para abt_base
query_abt_base = utils.import_query(os.path.join(TRAIN_DIR, 'make_abt.sql'))

dfs = []
for d in dates:
    query_etl = query_etl_base.format(date=d, stage="TRAIN")
    query_abt = query_abt_base.format(date=d)
    utils.execute_many_sql(query_etl, con)
    dfs.append(pd.read_sql_query(query_abt, con))

df = pd.concat(dfs, axis=0, ignore_index=True)
print(" Ok.")

if args.save_db:
    print("\n Salvando dados em Banco de dados...")
    table_name = 'tb_abt_{date_init}_{date_end}'.format(
        date_init=args.date_init.replace("-", ""),
        date_end=args.date_end.replace("-", ""))
    df.to_sql(table_name, con, index=False, if_exists='replace')
    print(" Ok.")

if args.save_file:
    print("\n Salvando dados em arquivo...")
Ejemplo n.º 4
0
                    choices=['sqlite', 'sql'],
                    default='sqlite')
args = parser.parse_args()

date_end = args.date_end
date_init = datetime.datetime.strptime(
    args.date_end, "%Y-%m-%d") - dateutils.relativedelta(years=1)
date_init = date_init.strftime("%Y-%m-%d")

# Importa a query
query = utils.import_query(os.path.join(DATA_PREP_DIR, 'segmentos.sql'))
query = query.format(date_init=date_init, date_end=date_end)

if args.database == 'sqlite':
    query = query.replace('olist.', '')
    table_name = 'tb_seller_sgmt'

elif args.database == 'sql':
    table_name = 'olist.tb_seller_sgmt'

# Abrindo conexão com banco...
conn = utils.connect_db(args.database, path=DB_PATH)

try:
    create_query = f'''CREATE TABLE {table_name} AS\n{query};'''
    utils.execute_many_sql(create_query, conn)

except:
    insert_query = f'''DELETE FROM {table_name} WHERE DT_SGMT = '{date_end}';
    INSERT INTO {table_name} {query};'''
    utils.execute_many_sql(insert_query, conn, verbose=True)
Ejemplo n.º 5
0
DATA_PREP_DIR = os.path.dirname(PREDICT_DIR)
SRC_DIR = os.path.dirname(DATA_PREP_DIR)
BASE_DIR = os.path.dirname(SRC_DIR)
DATA_DIR = os.path.join(BASE_DIR, 'data')
DB_PATH = os.path.join(os.path.dirname(BASE_DIR), 'upload_olist', 'data',
                       'olist.db')

# Buscando data de incio da extração
date_init = datetime.datetime.strptime(
    args.date_end, "%Y-%m-%d") - dateutils.relativedelta(months=args.delta)
date_init = date_init.strftime("%Y-%m-%d")

# Abrindo conexão com o banco
con = utils.connect_db('sqlite', path=DB_PATH)

# Importando query
query = utils.import_query(os.path.join(PREDICT_DIR, 'make_score.sql'))
query = query.format(date_init=date_init, date_end=args.date_end)

try:
    print("\n\n Criando tabela...")
    create = utils.import_query(os.path.join(PREDICT_DIR, 'create.sql'))
    create = create.format(query=query)
    utils.execute_many_sql(create, con, verbose=True)
    print(" Ok.")
except:
    print("\n\n Inserindo dados na tabela...")
    insert = utils.import_query(os.path.join(PREDICT_DIR, 'insert.sql'))
    insert = insert.format(query=query, date=args.date_end)
    utils.execute_many_sql(insert, con, verbose=True)
    print(" Ok.")