def get_duration(): u.log_print("Test string.get_duration", dashes=100) dstr = u.get_duration_string(0, end_time=0.35) u.log(dstr) assert dstr == "350 ms" dstr = u.get_duration_string(0, end_time=5.369) u.log(dstr) assert dstr == "5.3 s" dstr = u.get_duration_string(0, end_time=150) u.log(dstr) assert dstr == "2 minutes and 30 seconds" u.log_print()
def left_join_files(lpath='', rpath='', out='', debug=False): """Joints two files (lpath and rpath) on the first column of each file""" from .init import init_globals from .join import left_join_arrays u.log("[rl] left_join_files: start") start_time = time() if debug: gl.DEBUG_JOIN = True if lpath or rpath: init_globals() u.log(f"Loading arrays from '{lpath}' and '{rpath}'...") gl.ar_in = u.load_csv(lpath) ar_right = u.load_csv(rpath) u.log("Arrays loaded") u.log_print('|') else: u.log("Loading right arrays...") ar_right = u.load_csv(gl.OUT_SQL) u.log("Right array loaded") left_join_arrays(gl.ar_in, ar_right) if not out: out = gl.OUT_PATH u.log("Saving output file...") u.save_csv(gl.out_array, out) s = f"Output file saved in {out}" u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[rl] left_join_files: end ({dstr})") u.log_print('|')
def finish(out_path, prompt, nb, start_time): n_dup_key = len(gl.dup_key_list) n_dup = len(gl.dup_list) bn1 = u.big_number(gl.c_tot_out) bn2 = u.big_number(n_dup) s = (f"Output file {out_path} successfully generated" f" ({bn1} lines written, {bn2} pure duplicates removed).") u.log(s) if n_dup > 0: if nb != 0: out_dup = gl.OUT_DUP_FILE + str(nb) + gl.FILE_TYPE else: out_dup = gl.OUT_DUP_FILE + gl.FILE_TYPE u.save_csv(gl.dup_list, out_dup) u.log(f"Pure duplicates list written in {out_dup}") u.log_example(gl.dup_list, "pure duplicates") if n_dup_key > 0: if prompt: prompt_dup_key(n_dup_key) else: u.save_csv(gl.dup_key_list, gl.OUT_DUP_KEY_FILE) s = f"{n_dup_key} key duplicates found. List written in {gl.OUT_DUP_KEY_FILE}" u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[dq] sort_file: end ({dstr})")
def finish_this(start_time): gl.cnx.close() os.remove(gl.tmp_file_chunk) bn = u.big_number(gl.c_main) dstr = u.get_duration_string(start_time) u.log(f"{bn} lines exported") u.log(f"[sql] upload: end ({dstr})")
def finish_xml(out_path, start_time): dstr = u.get_duration_string(start_time) bn = u.big_number(gl.N_WRITE) s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})" u.log(s) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(out_path)
def finish_dq(start_time): (dms, dstr) = u.get_duration_string(start_time, True) s = f"[dq] run_dq: end ({dstr})" u.log(s) if gl.MSG_BOX_END: st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(gl.paths["out"])
def finish(out_path, start_time): u.log("Filtering over") bn1 = u.big_number(gl.n_r) bn2 = u.big_number(gl.n_o) s = (f"{bn1} lines read in the input file and" f" {bn2} lines to be written in the output file") u.log(s) u.log("Writing output file...") u.save_csv(gl.out_list, out_path) s = f"Output file saved in {out_path}" u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[toolFilter] filter: end ({dstr})") u.log_print() if gl.OPEN_OUT_FILE: u.startfile(out_path)
def finish(start_time): import partools.utils as u import partools.tools as to import partools.utils.sTools as st if gl.CHECK_DUP: s = "Checking duplicates on the first column of the output file..." u.log(s) to.find_dup(gl.OUT_PATH, col=1) u.log_print('|') (dms, dstr) = u.get_duration_string(start_time, True) s = f"reqlist: end ({dstr})" u.log("[rl] " + s) if gl.MSG_BOX_END: st.msg_box(s, "rl", dms, gl.MIN_DUR_TRIGGER) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(gl.OUT_PATH)
def compare_files(in_1, in_2, out_path): from .csf import compare_sorted_files u.log("[dq] compare_files: start") start_time = time() u.gen_header(in_1, gl.COMPARE_FIELD, out_path) compare_sorted_files(in_1, in_2, out_path) if gl.c_diff == 0: u.log("Files match") out = True else: bn = u.big_number(gl.c_diff) u.log(f"{bn} differences found") out = False dstr = u.get_duration_string(start_time) u.log(f"[dq] compare_files: end ({dstr})") return out
def finish_sbf(out_path, start_time): if gl.FOUND: lowI = gl.c_row - 1 - gl.PRINT_SIZE // 2 if lowI < 0: lowI = 0 highI = gl.c_row - 1 + gl.PRINT_SIZE // 2 u.save_list(gl.cur_list[lowI:highI], out_path) s = f"Current list written in {out_path}" u.log(s.format()) if gl.OPEN_OUT_FILE: u.startfile(out_path) else: bn = u.big_number(gl.c_main) s = (f"EOF reached ({bn} lines, {gl.c_list} temporary lists)" f", string '{gl.LOOK_FOR}' not found") u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[toolBF] search_big_file: end ({dstr})\n")
def send_chunk_duration(start): """Sends The duration of one insert to the main process. It is not wanted to send the duration of the first insert as it might be longer than expected due to cache mechanisms. Hence, the duration of the second insert is sent to the main process """ if not gl.MD: return # We only send the duration of the second insert if gl.c_main // gl.NB_MAX_ELT_INSERT != 2: return if not gl.MD["T"]: (dms, dstr) = u.get_duration_string(start, True) u.log(f"Sending duration to the main process ({dstr})...") if dms == 0: dms = 1 gl.MD["T"] = dms
def execute(**kwargs): """Executes a SQL script or a PL/SQL procedure on an Oracle DB See README.md for guidance See partools/quickstart/sql_execute.py for examples of use """ u.log('[sql] execute: start') start_time = time() u.init_kwargs(gl, kwargs) init_gl() script = get_final_script(gl.SCRIPT_IN) cnx = connect() c = cnx.cursor() if gl.PROC: u.log("Executing proc:") u.log_print(script) c.execute(script) u.log("Proc executed") else: command_list = script.split(';\n') n = len(command_list) if command_list[n - 1]: command_list[n - 1] = command_list[n - 1].strip(';') else: command_list = command_list[:-1] for command in command_list: u.log("Executing command:") u.log_print(command) c.execute(command) u.log("Command executed") c.close() cnx.commit() cnx.close() dstr = u.get_duration_string(start_time) u.log(f"[sql] execute: end ({dstr})") u.log_print()