def test_dq(): u.init_log('test_dq', True) u.mkdirs(gl.OUT_DIR, True) u.log_print() u.log_print("Test dq no header", dashes=100) ttry(td.dq_t, g.E_MH, gl.IN_MH, gl.IN12, gl.OUT1) ttry(td.dq_t, g.E_DH, gl.IN11, gl.IN_DH, gl.OUT1) u.log_print("Test dup key", dashes=100) td.dq_t(gl.IN_DK, gl.IN12, gl.OUT1, tpd=True) u.log_print("Test different files comparison", dashes=100) dq.file_match(gl.REF1_F, gl.REF2_F, err=False, out_path=gl.OUT_FM) dq.file_match(gl.OUT_FM, gl.REF_FDM) u.log_print("Test dq No. 1", dashes=100) td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1, 100, gl.REF_DUP1, sl=10) td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1, 15, gl.REF_DUP1) td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1_E, eq=True) u.log_print("Test dq No. 2", dashes=100) td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2, 100, gl.REF_DUP2, 2) td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2, 15, gl.REF_DUP2, 2) td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2_E, eq=True) u.log_print("Test dq No. 3", dashes=100) td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3, 15) td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3_E, eq=True) td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3, 100, tps=True, mls=6) td.file_match(gl.REF_SPLIT_3, gl.OUT_SPLIT_3) u.check_log(td.CL)
def test_sql(): u.init_log("test_sql", True) if not ts.is_test_db_defined(): return u.log_print("Test connect", dashes=100) ts.connect() u.log_print("Test iutd", dashes=100) ts.reset() ts.iutd() u.log_print("Test upload - missing header in input file", dashes=100) t.ttry(ts.upload, u.g.E_MH, gl.IN_MH) u.log_print("Test upload - interuption and recovery", dashes=100) ts.upload_interrupted() ts.upload(gl.IN, tr=True) u.log_print("Test download - no output", dashes=100) ts.download(gl.QUERY_NO, gl.DL_OUT, ti=True) u.log_print("Test download standard", dashes=100) ts.reset() ts.download(gl.QUERY, gl.DL_OUT) dq.file_match(gl.IN, gl.DL_OUT) dq.file_match(t.gl.OUT_DUP_TMP, gl.OUT_DUP_REF) s = "Test download RG with merge - interuption and recovery" u.log_print(s, dashes=100) ts.download_interrupted(gl.QUERY_RG, gl.DL_OUT_RG) ts.download(gl.QUERY_RG, gl.DL_OUT_RG, tr=True, sl=50) dq.file_match(gl.DL_OUT, gl.DL_OUT_RG) u.log_print("Test download RG without merge", dashes=100) ts.reset() ts.download(gl.QUERY_RG, gl.DL_OUT_RG, merge=False, cnx=1, sl=50) dq.file_match(gl.RG_REF, gl.RG_COMP) u.log_print("Test download - count simple", dashes=100) ts.reset() ts.download(gl.QUERY_COUNT_1, gl.DL_OUT_COUNT) dq.file_match(gl.DL_OUT_COUNT, gl.DL_OUT_COUNT_1_REF) ts.download(gl.QUERY_COUNT_1_RG, gl.DL_OUT_COUNT) dq.file_match(gl.DL_OUT_COUNT, gl.DL_OUT_COUNT_1_REF) u.log_print("Test download - count group by", dashes=100) ts.reset() ts.download(gl.QUERY_COUNT_2, gl.DL_OUT_COUNT) dq.file_match(gl.DL_OUT_COUNT, gl.DL_OUT_COUNT_2_REF) ts.download(gl.QUERY_COUNT_2_RG, gl.DL_OUT_COUNT) dq.file_match(gl.DL_OUT_COUNT, gl.DL_OUT_COUNT_2_REF) ts.clean_db([gl.T_TEST, gl.T_IUTD]) u.check_log(ts.CL)
def test_rl(): u.init_log('test_rl', True) if not ts.is_test_db_defined(): return u.mkdirs(gl.TMP_DIR, True) u.mkdirs(ts.gl.TMP_DIR, True) u.mkdirs(gl.OUT_DIR, True) u.log_print() u.log_print('Test join', dashes=100) tr.left_join_files(gl.LEFT_1, gl.RIGHT_1, gl.OUT_JOIN_REF_1) tr.left_join_files(gl.LEFT_2, gl.RIGHT_2, gl.OUT_JOIN_REF_2) tr.left_join_files(gl.LEFT_3, gl.RIGHT_3, gl.OUT_JOIN_REF_3) u.log_print('Preparing DB', dashes=100) ts.upload(ts.gl.IN) arr = u.load_csv(ts.gl.IN) arr = [elt[0] for elt in arr] u.save_csv(arr, gl.IN_1) u.log_print('Test rl - no sql output', dashes=100) t.ttry(tr.reqlist, u.g.E_VA, gl.IN_1, gl.OUT_1, gl.QUERY_NO) u.log_print('Test rl - no var in query', dashes=100) t.ttry(tr.reqlist, u.g.E_MV, gl.IN_1, gl.OUT_1, gl.QUERY_MV) u.log_print('Test rl - missing header', dashes=100) u.save_csv(arr[1:], gl.IN_MH) t.ttry(tr.reqlist, u.g.E_MH, gl.IN_MH, gl.OUT_1, gl.QUERY_1) u.log_print('Test rl - standard', dashes=100) tr.reqlist(gl.IN_1, gl.OUT_1, gl.QUERY_1, cnx=1) tr.reqlist(gl.OUT_1, gl.OUT_2, gl.QUERY_2) dq.file_match(ts.gl.IN, gl.OUT_2, del_dup=True) dq.file_match(t.gl.OUT_DUP_TMP, gl.OUT_DUP_REF) u.log_print('Test rl - interuption and recovery', dashes=100) u.mkdirs(gl.TMP_DIR, True) u.log_print() args = [gl.OUT_1, gl.OUT_3, gl.QUERY_2] tr.reqlist_interrupted(*args, cnx=6) tr.reqlist(gl.OUT_1, gl.OUT_3, gl.QUERY_2, True, cnx=6) dq.file_match(gl.OUT_2, gl.OUT_3) ts.clean_db([ts.gl.T_TEST]) u.check_log(tr.CL)
def test_tools(): u.init_log('test_tools', True) u.mkdirs(gl.OUT_DIR, True) u.log_print() u.log_print("Test tools.xml", dashes=100) tt.parse_xml() dq.file_match(gl.XML_OUT, gl.XML_OUT_REF) u.log_print("Test toolSplit", dashes=100) tt.split() u.log_print("Test toolDup - to.find_dup simple", dashes=100) to.find_dup(gl.DUP_IN, gl.DUP_OUT) u.log_print() dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF) u.log_print("Test toolDup - to.find_dup col", dashes=100) to.find_dup(gl.DUP_COL_IN, col=1) u.log_print() dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF) u.log_print("Test toolDup - to.del_dup + shuffle", dashes=100) to.shuffle_file(gl.DUP_IN, gl.SHUF_OUT) u.log_print() to.del_dup(gl.SHUF_OUT, gl.DUP_OUT) u.log_print() dq.file_match(gl.DUP_OUT, gl.DEL_DUP_OUT_REF) u.log_print("Test toolDup - to.find_dup_list", dashes=100) list_in = u.load_csv(gl.DUP_IN) dup_list = to.find_dup_list(list_in) u.save_csv(dup_list, gl.DUP_OUT) dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF) u.log_print("Test toolFilter", dashes=100) tt.flt() u.log_print("Test BF", dashes=100) tt.read_big_file() tt.search_big_file() bf.sort_big_file(ts.gl.IN, gl.SORT_BF_OUT) dq.file_match(ts.gl.IN, gl.SORT_BF_OUT, del_dup=True) u.check_log(tt.CL)
insert commands. If your script contains a single command or is a PL/SQL procedure (first example), you have to put PROC=True or nothing (default value); if it contains several commands (second example), you have to set PROC=False. Notes: - SCRIPT_IN accepts either a string or a file path - CNX_INFO and DB inputs follow the same rules as for sql.download For more details, check out the README.md file. """ import partools.sql as sql from partools.utils import init_log from partools.quickstart import files_dir init_log('sql_execute') db = 'XE' cnx_str = 'USERNAME/PWD@localhost:1521/XE' script_in = f'{files_dir}create_table.sql' sql.execute( DB=db, SCRIPT_IN=script_in, VAR_DICT={'TABLE_NAME': 'TEST'}, ) script_in = """ INSERT INTO TEST VALUES (1, 1, 1); INSERT INTO TEST VALUES (2, 2, 2); """
The input 'VAR_DICT' allows you to pass a dictionary containing variable names and values to be replaced in the input script. Here, '@@TABLE_NAME@@' will be replaced by 'TEST'. Notes: - SCRIPT_IN accepts either a string or a file path - CNX_INFO and DB inputs follow the same rules as for sql.download For more details, check out the README.md file. """ import partools.sql as sql from partools.utils import init_log from partools.quickstart import files_dir init_log('sql_upload') db = 'XE' cnx_str = 'USERNAME/PWD@localhost:1521/XE' script_in = f'{files_dir}insert_table.sql' execute_kwargs = { "SCRIPT_IN": f'{files_dir}create_table.sql', "PROC": True, } sql.upload( # CNX_INFO=cnx_str, # CNX_INFO=cnx_tns, DB=db, UPLOAD_IN=f'{files_dir}in.csv',
# This script shows you simple examples of use for the log and step_log functions import time import partools.utils as u u.log("This won't be logged in a file") u.init_log('test') u.log("This will be logged in a file") out_list = [] u.init_sl_time() for i in range(1, 21): time.sleep(0.05) # simulates io / calculation out_list.append(i) u.step_log(i, 5, "elements appended") u.log_print(f'out_list: {out_list}')
in the conf file (partools/conf.py, CONF_ORACLE) - As you'll see below, CNX_INFO can either be a connection string: 'USER/PWD@HOST:PORT/SERVICE_NAME' or a list: ['USERNAME', 'PWD', 'TNS_NAME'] or ['USERNAME', 'PWD', 'DSN'] For more details, check out the README.md file. """ from datetime import datetime import partools.sql as sql from partools.utils import g from partools.utils import init_log init_log('sql_download') db = 'XE' cnx_str = 'USERNAME/PWD@localhost:1521/XE' # Here 'XE_TNS' is a TNS_NAME that has to be defined in the tnsnames.ora file. # You can also directly put a DSN instead. cnx_tns = ['USERNAME', 'PWD', 'XE_TNS'] date = datetime.now().strftime("%Y%m%d") out_file = f"{g.dirs['OUT']}sql_{db}_{date}.csv" def example_simple(): query_in = "SELECT 'HELLO WORLD' as TEST FROM DUAL" sql.download(
In order for the run_dq function to work correctly, the input files must both have a pivot column (ie. containing keys/IDs) and be free of 'key duplicates' meaning having different lines with the same ID. The index of the pivot column is an input parameter (gl.PIVOT_IDX = 0) In this script, two files used for testing purposes are compared. Result interpretation: COMPARE_RES = in_11: the key is present in the file in_11 and not in the file in_12 COMPARE_RES = in_12: the key is present in the file in_12 and not in the file in_11 COMPARE_RES = in_11|in_12: the key is present in both files but lines differ. In the last case, the differences are outlined by writing the first and second files' values separated by '|' for each field that differs. For example 'O|N' in the field 'ETAT' means that the first file (in_11) has a 'O' and the second file (in_12) has a 'N'. For more details, check out the README.md file. """ import partools.dq as dq from partools.utils import init_log from partools.quickstart import files_dir init_log('dq') dq.run_dq( IN_DIR=files_dir, IN_FILE_NAME_1="in_11", IN_FILE_NAME_2="in_12", )
- Before running this example, you run quickstart/sql_upload.py to create and populate the TEST table. - CNX_INFO and DB inputs follow the same rules as for sql.download For more details, check out the README.md file. """ from datetime import datetime import partools.utils as u from partools.utils import g from partools.utils import init_log from partools.rl import reqlist from partools.quickstart import files_dir init_log('rl') db = 'XE' cnx_str = 'USERNAME/PWD@localhost:1521/XE' date = datetime.now().strftime("%Y%m%d") in_file = f"{g.dirs['IN']}rl_in.csv" out_file = f"{g.dirs['OUT']}export_RL_{db}_{date}.csv" # Creates input file from test file arr = u.load_csv(f'{files_dir}in.csv') arr = [elt[0:2] for elt in arr] u.save_csv(arr, in_file) # The input query has to be variabilized ie. contain @@IN@@: query_in = """
def test_mail(): u.init_log('test_mail', True) u.delete_folder('mail_back') if os.path.exists(cfg.MAILS_DIR): copytree(cfg.MAILS_DIR, 'mail_back') u.delete_folder(cfg.MAILS_DIR) u.log_print("Test gmail - KO, recipients not configureed", dashes=100) args = [gl.MAIL_NAME, gl.S_VDHT, gl.VD, gl.ATT] ttry(mail.gmail, gl.E_NOT_CONFIGURED, *args) u.log_print() u.log_print("Test gmail - KO, confidential file not found", dashes=100) recipients_path = cfg.MAILS_DIR + gl.MAIL_NAME + '/' + mail.gl.RECIPIENTS u.save_list(gl.RECIPIENTS_FILE, recipients_path) if os.path.exists(cfg.CFI_PATH): os.rename(cfg.CFI_PATH, cfg.CFI_PATH + '_') args = [gl.MAIL_NAME, gl.S_VDHT, gl.VD, gl.ATT] ttry(mail.gmail, gl.E_CFI, *args) os.rename(cfg.CFI_PATH + '_', cfg.CFI_PATH) u.log_print() mail.gl.TEST = True u.log_print(f"Test gmail - {gl.S_VDHT}", dashes=100) args = [gl.MAIL_NAME, gl.S_VDHT, gl.VD, gl.ATT] mail.gmail(*args) ast(gl.NVAR, gl.HT) u.log_print() u.log_print(f"Test gmail - {gl.S_VDPT}", dashes=100) args = [gl.MAIL_NAME, gl.S_VDPT, gl.VD, [], tm.BODY, gl.RECIPIENTS_IN, KEY] mail.gmail(*args) ast(gl.NVAR, gl.PT) u.log_print() u.log_print(f"Test gmail - {gl.S_HT}", dashes=100) args = [gl.MAIL_NAME, gl.S_HT] mail.gmail(*args) ast(gl.VAR, gl.HT) u.log_print() u.log_print(f"Test gmail - {gl.S_PT}", dashes=100) args = [gl.MAIL_NAME, gl.S_PT, [], [], tm.BODY] mail.gmail(*args) ast(gl.VAR, gl.PT) u.log_print() u.log_print("Test no_auth", dashes=100) args = [gl.MAIL_NAME, gl.S_VDHT, gl.VD, gl.ATT] ttry(mail.no_auth, gl.E_NO_AUT, *args) ast(gl.NVAR, gl.HT) u.log_print() u.log_print("Test outlook", dashes=100) args = [gl.MAIL_NAME, gl.S_VDPT, gl.VD, [], tm.BODY] ttry(mail.outlook, gl.E_OUTLOOK, *args) ast(gl.NVAR, gl.PT) u.log_print() # Restauring mail backup u.delete_folder(cfg.MAILS_DIR) copytree('mail_back', cfg.MAILS_DIR) u.delete_folder('mail_back') u.check_log(tm.CL)