shutil.copyfile(os.path.join(TRAIN_DIR, 'etl.sql'), os.path.join(PREDICT_DIR, 'etl.sql')) query = utils.import_query(os.path.join(PREDICT_DIR, 'etl.sql')) print('\nImportando modelo...') model = pd.read_pickle(os.path.join(MODELS_DIR, 'models.pkl')) print('Pronto.') print('\nAbrindo conexão...') conn = utils.connect_db('sqlite', path=DB_PATH) print('Pronto.') print('\nFazendo ETL...') query = query.format(date=args.date, stage='PREDICT') utils.execute_many_sql(query, conn) df = pd.read_sql_table('PRE_ABT_PREDICT_CHURN', conn) print('Pronto.') print('\nRealizando predições...') df['churn_prob'] = model['models'].predict_proba(df[model['features']])[:, 1] print('Pronto.') print('\nSalvando base escora...') table = df[['churn_prob', 'seller_id']] if args.export == 'sqlite': table.to_sql('tb_churn_score', conn) elif args.export == 'csv': table.to_csv(os.path.join(DATA_DIR, 'tb_churn_score.csv'), index=False) print('Pronto\n')
# Calculo da quantidade maxima de dias para revenda days_between = datetime.datetime.strptime( args.date_end, "%Y-%m-%d" ) - datetime.datetime.strptime( date_init, "%Y-%m-%d" ) days_between = days_between.days print("\n\n Criando tabela axuliar...") # identificação de todas possíveis categorias categories_query = utils.import_query( os.path.join(TRAIN_DIR, 'categories.sql') ) categories = pd.read_sql_query( categories_query, con )['categories'].tolist() # criação de tabela com info de dia a dia por catetoria df_days = pd.DataFrame() for c in categories: df_tmp = pd.DataFrame( {"days": list(range(1, days_between+1)), 'category': [c] * days_between } ) df_days = df_days.append( df_tmp, ignore_index=True ) df_days.to_sql( 'tb_days_between', con, if_exists='replace', index=False ) print("Ok.") # Query de modelagem print("\n\n Executando o ETL...") query = utils.import_query( os.path.join(TRAIN_DIR, 'lifetime.sql') ) # Importa a nossa query query = query.format( date_init = date_init, date_end = args.date_end ) utils.execute_many_sql( query, con, verbose=True ) print("\n Ok.") if args.backup: file_name = 'lifetime_{date_init}_{date_end}.csv'.format( date_init=date_init.replace("-",""), date_end=args.date_end.replace("-","") ) df = pd.read_sql_table( 'tb_model_lifetime', con ) # Executa a query dentro do banco df.to_csv( os.path.join(DATA_DIR, file_name), sep=",", index=False) # Salvando em um csv...
print("\n Abrindo conexão com banco de dados...") con = utils.connect_db('sqlite', path=DB_PATH) print(" Ok.") print("\n Executando a extração dos dados...") # Query de features base query_etl_base = utils.import_query(os.path.join(TRAIN_DIR, 'etl.sql')) # Query para abt_base query_abt_base = utils.import_query(os.path.join(TRAIN_DIR, 'make_abt.sql')) dfs = [] for d in dates: query_etl = query_etl_base.format(date=d, stage="TRAIN") query_abt = query_abt_base.format(date=d) utils.execute_many_sql(query_etl, con) dfs.append(pd.read_sql_query(query_abt, con)) df = pd.concat(dfs, axis=0, ignore_index=True) print(" Ok.") if args.save_db: print("\n Salvando dados em Banco de dados...") table_name = 'tb_abt_{date_init}_{date_end}'.format( date_init=args.date_init.replace("-", ""), date_end=args.date_end.replace("-", "")) df.to_sql(table_name, con, index=False, if_exists='replace') print(" Ok.") if args.save_file: print("\n Salvando dados em arquivo...")
choices=['sqlite', 'sql'], default='sqlite') args = parser.parse_args() date_end = args.date_end date_init = datetime.datetime.strptime( args.date_end, "%Y-%m-%d") - dateutils.relativedelta(years=1) date_init = date_init.strftime("%Y-%m-%d") # Importa a query query = utils.import_query(os.path.join(DATA_PREP_DIR, 'segmentos.sql')) query = query.format(date_init=date_init, date_end=date_end) if args.database == 'sqlite': query = query.replace('olist.', '') table_name = 'tb_seller_sgmt' elif args.database == 'sql': table_name = 'olist.tb_seller_sgmt' # Abrindo conexão com banco... conn = utils.connect_db(args.database, path=DB_PATH) try: create_query = f'''CREATE TABLE {table_name} AS\n{query};''' utils.execute_many_sql(create_query, conn) except: insert_query = f'''DELETE FROM {table_name} WHERE DT_SGMT = '{date_end}'; INSERT INTO {table_name} {query};''' utils.execute_many_sql(insert_query, conn, verbose=True)
DATA_PREP_DIR = os.path.dirname(PREDICT_DIR) SRC_DIR = os.path.dirname(DATA_PREP_DIR) BASE_DIR = os.path.dirname(SRC_DIR) DATA_DIR = os.path.join(BASE_DIR, 'data') DB_PATH = os.path.join(os.path.dirname(BASE_DIR), 'upload_olist', 'data', 'olist.db') # Buscando data de incio da extração date_init = datetime.datetime.strptime( args.date_end, "%Y-%m-%d") - dateutils.relativedelta(months=args.delta) date_init = date_init.strftime("%Y-%m-%d") # Abrindo conexão com o banco con = utils.connect_db('sqlite', path=DB_PATH) # Importando query query = utils.import_query(os.path.join(PREDICT_DIR, 'make_score.sql')) query = query.format(date_init=date_init, date_end=args.date_end) try: print("\n\n Criando tabela...") create = utils.import_query(os.path.join(PREDICT_DIR, 'create.sql')) create = create.format(query=query) utils.execute_many_sql(create, con, verbose=True) print(" Ok.") except: print("\n\n Inserindo dados na tabela...") insert = utils.import_query(os.path.join(PREDICT_DIR, 'insert.sql')) insert = insert.format(query=query, date=args.date_end) utils.execute_many_sql(insert, con, verbose=True) print(" Ok.")