def model_evaluation(df, models, best_models, running_times, datasets, number_of_features, options_file, oversample_check, proj_id): performance_info_append(time.time(), 'Section_D_Start') log_record('Início Secção D...', proj_id) results_training, results_test, predictions = performance_evaluation_classification( models, best_models, running_times, datasets, options_file, proj_id ) # Creates a df with the performance of each model evaluated in various metrics, explained in the provided pdf plot_roc_curve(best_models, models, datasets, 'roc_curve_temp_' + str(number_of_features)) df_model_dict = multiprocess_model_evaluation(df, models, datasets, best_models, predictions, configuration_parameters, oversample_check, proj_id) model_choice_message, best_model_name, _, section_e_upload_flag = model_choice( options_file.DSN_MLG_PRD, options_file, results_test) if not section_e_upload_flag: best_model = None else: best_model = df_model_dict[best_model_name] feature_contribution(best_model, configuration_parameters, 'Modelo', options_file, proj_id) log_record('Fim Secção D.', proj_id) performance_info_append(time.time(), 'Section_D_End') return model_choice_message, best_model, df.shape[0]
def main(): log_record('Projeto: Sugestão Encomenda Baviera - Viaturas', project_id) ### Options: input_file = 'dbs/' + 'full_data_bmw.csv' target_variable = [ 'new_score' ] # possible targets = ['stock_class1', 'stock_class2', 'margem_class1', 'score_class', 'new_score'] oversample_check = 0 query_filters = {'NLR_CODE': '701'} ### number_of_features = 'all' df = data_acquisition(input_file, query_filters, local=0) control_prints(df, 'after A') df = data_processing(df, target_variable, oversample_check, number_of_features) control_prints(df, 'after B') model_choice_message, best_model, vehicle_count = data_grouping_by_locals_temp( df, configuration_parameters, level_2_optionals_baviera_options.project_id) deployment(best_model, level_2_optionals_baviera_options.sql_info['database'], level_2_optionals_baviera_options.sql_info['final_table']) performance_info(level_2_optionals_baviera_options.project_id, level_2_optionals_baviera_options, model_choice_message, vehicle_count) log_record( 'Conclusão com sucesso - Projeto {}.\n'.format( project_dict[project_id]), project_id)
def deployment(df, db, view): performance_info_append(time.time(), 'Section_E_Start') log_record('Início Secção E...', project_id) if df is not None: df['NLR_Code'] = level_2_optionals_cdsu_options.nlr_code # df = column_rename(df, list(level_2_optionals_cdsu_options.column_sql_renaming.keys()), list(level_2_optionals_cdsu_options.column_sql_renaming.values())) df = df.rename( columns=level_2_optionals_cdsu_options.column_sql_renaming) control_prints(df, 'before deployment, after renaming', head=1) sql_delete( level_2_optionals_cdsu_options.DSN_MLG_PRD, db, view, level_2_optionals_cdsu_options, {'NLR_Code': '{}'.format(level_2_optionals_cdsu_options.nlr_code)}) sql_inject(df, level_2_optionals_cdsu_options.DSN_MLG_PRD, db, view, level_2_optionals_cdsu_options, list(level_2_optionals_cdsu_options. column_checkpoint_sql_renaming.values()), check_date=1) log_record('Fim Secção E.', project_id) performance_info_append(time.time(), 'Section_E_End') return
def data_acquisition(input_files, query_filters, local=0): performance_info_append(time.time(), 'Section_A_Start') df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df = pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame() log_record('Início Secção A...', options_file.project_id) if local: df_facts = read_csv(input_files[0], index_col=0, parse_dates=options_file.date_columns, infer_datetime_format=True) df_facts_duration = read_csv(input_files[1], index_col=0) df_clients = read_csv(input_files[2], index_col=0) df_pbi_categories = read_csv(input_files[3], index_col=0) df_manual_classifications = read_csv(input_files[4], index_col=0) keywords_df = read_csv(input_files[5], index_col=0) elif not local: df_facts = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_facts'], options_file, options_file.sql_fact_columns, query_filters=query_filters[0], parse_dates=options_file.date_columns) df_facts_duration = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_facts_durations'], options_file, options_file.sql_facts_durations_columns, query_filters=query_filters[1]) df_clients = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_clients'], options_file, options_file.sql_dim_contacts_columns) df_pbi_categories = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_pbi_categories'], options_file, options_file.sql_pbi_categories_columns, query_filters=query_filters[1]) df_manual_classifications = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['aux_table'], options_file) keywords_df = sql_retrieve_df(options_file.DSN_MLG_PRD, options_file.sql_info['database_final'], options_file.sql_info['keywords_table_str'], options_file, columns=['Keywords_PT', 'Keywords_ES']).dropna() save_csv([df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df], ['dbs/db_facts_initial', 'dbs/db_facts_duration', 'dbs/db_clients_initial', 'dbs/db_pbi_categories_initial', 'dbs/db_manual_classification', 'dbs/db_keywords_df']) keyword_dict, ranking_dict = sql_mapping_retrieval(options_file.DSN_MLG_PRD, options_file.sql_info['database_final'], options_file.sql_info['keywords_table'], 'Keyword_Group', options_file, multiple_columns=1) keyword_dict = keyword_dict[0] log_record('Fim Secção A...', options_file.project_id) performance_info_append(time.time(), 'Section_A_End') return df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df, keyword_dict, ranking_dict
def multiprocess_evaluation(args): df, model_name, datasets, best_models, predictions, configuration_parameters, oversample_check, project_id = args level_0_performance_report.log_record( 'A avaliar o modelo {} @ {}...'.format( level_0_performance_report.dict_models_name_conversion[model_name] [0], time.strftime("%H:%M:%S @ %d/%m/%y")), project_id) train_x_copy, test_x_copy = datasets['train_x'].copy( deep=True), datasets['test_x'].copy(deep=True) probabilities = probability_evaluation(model_name, best_models, train_x_copy, test_x_copy) df_model = add_new_columns_to_df(df, probabilities, predictions[model_name], datasets, configuration_parameters, oversample_check, project_id) df_model = df_decimal_places_rounding(df_model, { 'proba_0': 2, 'proba_1': 2 }) level_1_e_deployment.save_csv( [df_model], [base_path + '/output/' + 'db_final_classification_' + model_name]) level_0_performance_report.log_record( 'Modelo {} terminou @ {}'.format( level_0_performance_report.dict_models_name_conversion[model_name] [0], time.strftime("%H:%M:%S @ %d/%m/%y")), project_id) return model_name, df_model
def flow_step_2(df): log_record('Step 2 started.', options_file.project_id) df = df.drop_duplicates(subset=['Part_Ref', 'Part_Desc', 'Product_Group_DW', 'Client_Id', 'Average_Cost', 'PVP_1', 'PLR_Account', 'Part_Desc_PT']) log_record('Step 2 ended.', options_file.project_id) return df
def sql_retrieve_df_specified_query(dsn, db, options_file, query): start = time.time() try: if OS_PLATFORM == 'WINDOWS': cnxn = pyodbc.connect('DSN={};UID={};PWD={};DATABASE={}'.format( dsn, options_file.UID, options_file.PWD, db), searchescape='\\') elif OS_PLATFORM == 'LINUX': cnxn = pyodbc.connect( 'Driver=ODBC Driver 17 for SQL Server;Server=tcp:' + str(dsn) + ';UID=' + str(options_file.UID) + ';PWD=' + str(options_file.PWD) + ';DATABASE=' + str(db), searchescape='\\') df = pd.read_sql(query, cnxn) cnxn.close() print('Elapsed time: {:.2f} seconds.'.format(time.time() - start)) return df except (pyodbc.ProgrammingError, pyodbc.OperationalError) as error: level_0_performance_report.log_record( 'Erro ao obter os dados do DW - {}'.format(error), options_file.project_id, flag=2) return
def model_training(df): performance_info_append(time.time(), 'Section_E_Start') log_record('Início Secção D...', options_file.project_id) non_classified_df_scored = pa_servicedesk_models_training.main() df = update_labels(df, non_classified_df_scored, 'Request_Num', 'Label') log_record('Fim Secção D.', options_file.project_id) return df
def sql_retrieve_df(dsn, db, view, options_file, columns='*', query_filters=0, column_renaming=0, **kwargs): start = time.time() query, query_filters_string_list = None, [] if columns != '*': columns = str(columns)[1:-1].replace('\'', '') try: if OS_PLATFORM == 'WINDOWS': cnxn = pyodbc.connect('DSN={};UID={};PWD={};DATABASE={}'.format( dsn, options_file.UID, options_file.PWD, db), searchescape='\\') elif OS_PLATFORM == 'LINUX': cnxn = pyodbc.connect( 'Driver=ODBC Driver 17 for SQL Server;Server=tcp:' + str(dsn) + ';UID=' + str(options_file.UID) + ';PWD=' + str(options_file.PWD) + ';DATABASE=' + str(db), searchescape='\\') if not query_filters: query = 'SELECT ' + columns + ' FROM ' + view + ' WITH (NOLOCK)' elif type(query_filters) == dict: for key in query_filters: if type(query_filters[key]) == list: testing_string = '\'%s\'' % "\', \'".join( [str(x) for x in query_filters[key]]) query_filters_string_list.append(key + ' in (' + testing_string + ')') else: query_filters_string_list.append(key + ' = \'%s\'' % str(query_filters[key])) query = 'SELECT ' + columns + ' FROM ' + view + ' WITH (NOLOCK) WHERE ' + ' and '.join( query_filters_string_list) df = pd.read_sql(query, cnxn, **kwargs) if column_renaming: level_1_b_data_processing.column_rename( df, list(options_file.sql_to_code_renaming.keys()), list(options_file.sql_to_code_renaming.values())) cnxn.close() print('Elapsed time: {:.2f} seconds.'.format(time.time() - start)) return df except (pyodbc.ProgrammingError, pyodbc.OperationalError) as error: level_0_performance_report.log_record( 'Erro ao obter os dados do DW - {}'.format(error), options_file.project_id, flag=2) return
def data_modelling(df, df_top_words, df_manual_classification, keyword_dict, ranking_dict): performance_info_append(time.time(), 'Section_C_Start') log_record('Início Secção C...', options_file.project_id) df = new_request_type(df, df_top_words, df_manual_classification, keyword_dict, ranking_dict, options_file) log_record('Fim Secção C.', options_file.project_id) performance_info_append(time.time(), 'Section_C_End') return df
def data_acquisition(): performance_info_append(time.time(), 'Section_A_Start') log_record('Início Secção A...', options_file.project_id) current_date, _ = time_tags() dfs = [] for query in [ options_file.sales_query, options_file.stock_query, options_file.product_db_query, options_file.customer_group_query, options_file.dealers_query ]: df = sql_retrieve_df_specified_query( options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file, query) # df.to_csv(file_name + '.csv') dfs.append(df) df_sales = dfs[0] df_stock = dfs[1] df_pdb = dfs[2] df_customers = dfs[3] df_dealers = dfs[4] df_pdb.drop_duplicates( subset='VehicleData_Code', inplace=True ) # There are repeated VehicleData_Code inside this union between BI_DTR and BI_DW_History df_sales['NLR_Code'] = pd.to_numeric(df_sales['NLR_Code'], errors='ignore') # Adding missing information regarding customers missing_customer_info_treatment(df_sales) # Addition of customer information df_customers_and_dealers = df_join_function( df_dealers, df_customers[['Customer_Group_Code', 'Customer_Group_Desc']].set_index('Customer_Group_Code'), on='Customer_Group_Code', how='left') df_sales = df_join_function( df_sales, df_customers_and_dealers[[ 'SLR_Account_CHS_Key', 'NDB_VATGroup_Desc', 'VAT_Number_Display', 'NDB_Contract_Dealer_Desc', 'NDB_VHE_PerformGroup_Desc', 'NDB_VHE_Team_Desc', 'Customer_Display', 'Customer_Group_Code', 'Customer_Group_Desc', 'NDB_Dealer_Code' ]].set_index('SLR_Account_CHS_Key'), on='SLR_Account_CHS_Key', how='left') log_record('Fim Secção A.', options_file.project_id) performance_info_append(time.time(), 'Section_A_End') return df_sales, df_stock, df_pdb, df_customers, df_dealers
def main(): log_record('Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id) df = data_acquisition() # df = pd.read_csv('dbs/dataset_train_20200817_v5.csv') df_prob_cost = data_modelling(df) deployment(df_prob_cost, options_file.DATA_PROB_PATH_ALL_COST) return
def deployment(df): performance_info_append(time.time(), 'Section_E_Start') log_record('Início Secção E...', options_file.project_id) df = df.astype(object).where(pd.notnull(df), None) sql_inject(df, options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['final_table'], options_file, ['Request_Num', 'StemmedDescription', 'Description', 'Language', 'Open_Date', 'Label', 'Classification_Flag'], truncate=1) log_record('Fim Secção E.', options_file.project_id) performance_info_append(time.time(), 'Section_E_End') return
def flow_step_4_5(df, keywords_df): log_record('Step 4.5 started', options_file.project_id) keywords_col_1 = list(set(keywords_df.loc[keywords_df['Column'] == 'Part_Desc', 'Word'])) keywords_col_2 = list(set(keywords_df.loc[keywords_df['Column'] == 'Part_Desc_PT', 'Word'])) # Detects keywords data df = detect_keywords(df, 'Part_Desc_concat', keywords_col_1) df = detect_keywords(df, 'Part_Desc_PT_concat', keywords_col_2) log_record('Step 4.5 ended.', options_file.project_id) return df
def data_acquisition(input_file, query_filters, local=0): performance_info_append(time.time(), 'Section_A_Start') log_record('Início Secção A...', project_id) if local: try: df = read_csv(input_file, encoding='utf-8', parse_dates=['Purchase_Date', 'Sell_Date'], usecols=level_2_optionals_baviera_options. sql_to_code_renaming.keys(), infer_datetime_format=True, decimal='.') column_rename( df, list(level_2_optionals_baviera_options.sql_to_code_renaming. keys()), list(level_2_optionals_baviera_options.sql_to_code_renaming. values())) except UnicodeDecodeError: df = read_csv(input_file, encoding='latin-1', parse_dates=['Purchase_Date', 'Sell_Date'], usecols=level_2_optionals_baviera_options. sql_to_code_renaming.keys(), infer_datetime_format=True, decimal='.') column_rename( df, list(level_2_optionals_baviera_options.sql_to_code_renaming. keys()), list(level_2_optionals_baviera_options.sql_to_code_renaming. values())) else: df = sql_retrieve_df( level_2_optionals_baviera_options.DSN_MLG_PRD, level_2_optionals_baviera_options.sql_info['database'], level_2_optionals_baviera_options.sql_info['initial_table'], level_2_optionals_baviera_options, list( level_2_optionals_baviera_options.sql_to_code_renaming.keys()), query_filters, column_renaming=1, parse_dates=['Purchase_Date', 'Sell_Date']) project_units_count_checkup(df, 'Nº Stock', level_2_optionals_baviera_options, sql_check=0) log_record('Fim Secção A.', project_id) performance_info_append(time.time(), 'Section_A_End') return df
def nan_detection(df, original_col, new_col): new_col_rows_with_nan = df[df[new_col].isnull()] if new_col_rows_with_nan.shape[0]: log_record( 'Aviso no Merge de Colunas - NaNs detetados em: {}, não encontrada a parametrização para {} nos veículos(s) com VHE_Number(s): {}' .format(new_col, new_col_rows_with_nan[original_col].unique(), new_col_rows_with_nan['Nº Stock'].unique()), project_id, flag=1) return
def data_processing(df_sales, pse_code, options_info): performance_info_append(time.time(), 'Section_B_Start') log_record('Início Secção B...', options_file.project_id) start_treatment = time.time() df_sales = apv_dataset_treatment(df_sales, pse_code, options_info.urgent_purchases_flags, options_info.project_id) print('Elapsed time: {:.2f} seconds.'.format(time.time() - start_treatment)) log_record('Fim Secção B.', options_file.project_id) performance_info_append(time.time(), 'Section_B_End') return df_sales
def data_acquisition(options_info, pse_group, last_processed_date, current_date): performance_info_append(time.time(), 'Section_A_Start') log_record('Início Secção A...', options_file.project_id) start = time.time() df_sales, df_history, df_product_group_dw = dw_data_retrieval(pse_group, current_date, options_info, last_processed_date) print('Elapsed time: {:.2f} seconds.'.format(time.time() - start)) log_record('Fim Secção A.', options_file.project_id) performance_info_append(time.time(), 'Section_A_End') return df_sales, df_product_group_dw, df_history
def data_acquisition(): performance_info_append(time.time(), 'Section_A_Start') log_record('Início Secção A...', project_id) df = sql_retrieve_df_specified_query(options_file.DSN_MLG_PRD, options_file.sql_info['database_mlg'], options_file, options_file.get_train_dataset_query) # df.to_csv('dbs/dataset_train_20200817_v6.csv', index=False) log_record('Fim Secção A.', project_id) performance_info_append(time.time(), 'Section_A_End') return df
def data_modelling(datasets, datasets_non_ohe, models): performance_info_append(time.time(), 'Section_C_Start') log_record('Início Secção C...', options_file.project_id) best_models, running_times = regression_model_training( models, datasets['train_x'], datasets_non_ohe['train_x'], datasets['train_y'], datasets_non_ohe['train_y'], options_file.regression_models_standard, options_file.k, options_file.gridsearch_score, options_file.project_id) save_model(best_models, models, options_file.project_id) log_record('Fim Secção C.', options_file.project_id) performance_info_append(time.time(), 'Section_C_End') return best_models, running_times
def flow_step_5(df, manual_classified_files_loc): log_record('Step 5 started.', options_file.project_id) # Step 0 - Merge all manual/application classifications manual_classified_families = get_dgo_manual_classifications(manual_classified_files_loc) pse_fact_pa_parts_classification_refs = get_fact_pa_classifications() all_classifications = pd.concat([manual_classified_families, pse_fact_pa_parts_classification_refs]) # Step 1 df = classification_corrections_start(df, all_classifications) df = product_group_dw_complete_replaces(df.copy()) log_record('Step 5 ended.', options_file.project_id) return df, all_classifications
def main(): log_record('Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id) input_file_facts, input_file_durations, input_file_clients, input_file_pbi_categories, input_file_manual_classification, input_keywords_df = 'dbs/db_facts_initial.csv', 'dbs/db_facts_duration.csv', 'dbs/db_clients_initial.csv', 'dbs/db_pbi_categories_initial.csv', 'dbs/db_manual_classification.csv', 'dbs/db_keywords_df.csv' query_filters = [{'Cost_Centre': '6825', 'Record_Type': ['1', '2']}, {'Cost_Centre': '6825'}] df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df, keyword_dict, ranking_dict = data_acquisition([input_file_facts, input_file_durations, input_file_clients, input_file_pbi_categories, input_file_manual_classification, input_keywords_df], query_filters, local=0) df, df_top_words = data_processing(df_facts, df_facts_duration, df_clients, df_pbi_categories, keywords_df) df = data_modelling(df, df_top_words, df_manual_classifications, keyword_dict, ranking_dict) df['Classification_Flag'] = np.where(df['Label'] == 'Não Definido', 1, 0) deployment(df) # Deploys all according to current classification df = model_training(df) deployment(df) performance_info(options_file.project_id, options_file, model_choice_message='N/A') log_record('Conclusão com sucesso - Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id)
def deployment(df, db, view): performance_info_append(time.time(), 'Section_E_Start') log_record('Início Secção E...', project_id) for col in list(df): df[col] = df[col].astype(str) df['NLR_Code'] = level_2_optionals_baviera_options.nlr_code if df is not None: df = column_rename( df, list(level_2_optionals_baviera_options.column_sql_renaming.keys()), list(level_2_optionals_baviera_options.column_sql_renaming.values( ))) if model_training_check: sql_delete( level_2_optionals_baviera_options.DSN_MLG_PRD, db, view, level_2_optionals_baviera_options, { 'NLR_Code': '{}'.format(level_2_optionals_baviera_options.nlr_code) }) sql_inject(df, level_2_optionals_baviera_options.DSN_MLG_PRD, db, view, level_2_optionals_baviera_options, level_2_optionals_baviera_options.columns_for_sql, check_date=1) else: sql_delete( level_2_optionals_baviera_options.DSN_MLG_PRD, db, view, level_2_optionals_baviera_options, { 'NLR_Code': '{}'.format(level_2_optionals_baviera_options.nlr_code) }) sql_inject(df, level_2_optionals_baviera_options.DSN_MLG_PRD, db, view, level_2_optionals_baviera_options, level_2_optionals_baviera_options.columns_for_sql_temp, check_date=1) log_record('Fim Secção E.', project_id) performance_info_append(time.time(), 'Section_E_End') return
def main(): log_record('Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id) for pse_group in options_file.pse_code_groups: current_date, _ = time_tags(format_date='%Y%m%d') # current_date = '20210218' last_processed_date, second_to_last_processed_date, preprocessed_data_exists_flag = apv_last_stock_calculation(options_file.min_date, current_date, pse_group[0], options_file.project_id) # Considering all PSE Groups were processed in the same day print('Processing data from {} to {}'.format(last_processed_date, current_date)) print('Deleting data for the day of {}'.format(second_to_last_processed_date)) df_sales_group, df_product_group_dw, df_history_group = data_acquisition(options_file, pse_group, last_processed_date, current_date) for pse_code in pse_group: print('Starting PSE_Code {}'.format(pse_code)) # log_record('Começou PSE = {}'.format(pse_code), options_file.project_id) # # df_sales = df_sales_group.loc[df_sales_group['PSE_Code'] == pse_code, :] try: df_history = df_history_group.loc[df_history_group['SO_Code'] == pse_code, :] except FileNotFoundError: print('df_history not found for pse_code {} and date {}'.format(pse_code, current_date)) try: df_sales_cleaned = pd.read_csv('dbs/df_sales_cleaned_' + str(pse_code) + '_' + str(current_date) + '.csv', parse_dates=['Movement_Date']) print('File found for pse_code {} and date {}'.format(pse_code, current_date)) except FileNotFoundError: print('df_sales not found for pse_code {} and date {}'.format(pse_code, current_date)) # # if not df_sales.shape[0]: # log_record('Sem vendas para o PSE Code = {}'.format(pse_code), options_file.project_id, flag=1) # flag_testing = 1 # # if not df_history.shape[0]: # log_record('Sem histórico para o PSE Code = {}'.format(pse_code), options_file.project_id, flag=1) # flag_testing = 1 # # if flag_testing: # continue # # df_sales_cleaned = data_processing(df_sales, pse_code, options_file) try: df_solver, df_part_ref_ta = data_modelling(pse_code, df_sales_cleaned, df_history, last_processed_date, current_date, preprocessed_data_exists_flag, options_file.project_id) except ValueError: log_record('Não foram encontradas peças para o PSE_Code {}.'.format(pse_code), options_file.project_id, flag=1) continue deployment(df_solver, df_part_ref_ta, pse_code) log_record('Terminou PSE = {}'.format(pse_code), options_file.project_id) # performance_info(options_file.project_id, options_file, model_choice_message='N/A') # delete_temp_files(options_file.pse_codes_groups, second_to_last_processed_date) log_record('Conclusão com sucesso - Projeto: {} .\n'.format(project_dict[options_file.project_id]), options_file.project_id)
def sql_truncate(dsn, options_file, database, view, query=None): level_0_performance_report.log_record( 'A truncar view {} da DB {}.'.format(view, database), options_file.project_id) cnxn = odbc_connection_creation(dsn, options_file.UID, options_file.PWD, database) if query is None: query = "TRUNCATE TABLE " + view cursor = cnxn.cursor() cursor.execute(query) cnxn.commit() cursor.close() cnxn.close()
def deployment(df, db, view): performance_info_append(time.time(), 'Section_E_Start') log_record('Início Secção E...', options_file.project_id) if df is not None: sel_df = df.loc[:, options_file.sql_columns_vhe_fact_bi].copy() sel_df['NLR_Posting_Date'] = sel_df['NLR_Posting_Date'].astype( object).where(sel_df['NLR_Posting_Date'].notnull(), None) sel_df['SLR_Document_Date_CHS'] = sel_df[ 'SLR_Document_Date_CHS'].astype(object).where( sel_df['SLR_Document_Date_CHS'].notnull(), None) sel_df['SLR_Document_Date_RGN'] = sel_df[ 'SLR_Document_Date_RGN'].astype(object).where( sel_df['SLR_Document_Date_RGN'].notnull(), None) sel_df['Ship_Arrival_Date'] = sel_df['Ship_Arrival_Date'].astype( object).where(sel_df['Ship_Arrival_Date'].notnull(), None) sel_df['Registration_Request_Date'] = sel_df[ 'Registration_Request_Date'].astype(object).where( sel_df['Registration_Request_Date'].notnull(), None) sel_df['Registration_Date'] = sel_df['Registration_Date'].astype( object).where(sel_df['Registration_Date'].notnull(), None) sel_df['PDB_Start_Order_Date'] = sel_df['PDB_Start_Order_Date'].astype( object).where(sel_df['PDB_Start_Order_Date'].notnull(), None) sel_df['PDB_End_Order_Date'] = sel_df['PDB_End_Order_Date'].astype( object).where(sel_df['PDB_End_Order_Date'].notnull(), None) sel_df['Fixed_Margin_II'] = sel_df['Fixed_Margin_II'].round(2) sel_df = sel_df.where(sel_df.notnull(), None) sel_df.rename(columns={ 'prev_sales_check': 'Previous_Sales_Flag', 'number_prev_sales': 'Previous_Sales_Count' }, inplace=True) sql_inject(sel_df, options_file.DSN_SRV3_PRD, db, view, options_file, list(sel_df), truncate=1, check_date=1) log_record('Fim Secção E.', options_file.project_id) performance_info_append(time.time(), 'Section_E_End')