コード例 #1
0
ファイル: test_1_utils.py プロジェクト: paularnaud2/ParTools
def get_duration():
    u.log_print("Test string.get_duration", dashes=100)
    dstr = u.get_duration_string(0, end_time=0.35)
    u.log(dstr)
    assert dstr == "350 ms"
    dstr = u.get_duration_string(0, end_time=5.369)
    u.log(dstr)
    assert dstr == "5.3 s"
    dstr = u.get_duration_string(0, end_time=150)
    u.log(dstr)
    assert dstr == "2 minutes and 30 seconds"
    u.log_print()
コード例 #2
0
ファイル: main.py プロジェクト: paularnaud2/ParTools
def left_join_files(lpath='', rpath='', out='', debug=False):
    """Joints two files (lpath and rpath) on the first column of each file"""
    from .init import init_globals
    from .join import left_join_arrays

    u.log("[rl] left_join_files: start")
    start_time = time()
    if debug:
        gl.DEBUG_JOIN = True
    if lpath or rpath:
        init_globals()
        u.log(f"Loading arrays from '{lpath}' and '{rpath}'...")
        gl.ar_in = u.load_csv(lpath)
        ar_right = u.load_csv(rpath)
        u.log("Arrays loaded")
        u.log_print('|')
    else:
        u.log("Loading right arrays...")
        ar_right = u.load_csv(gl.OUT_SQL)
        u.log("Right array loaded")
    left_join_arrays(gl.ar_in, ar_right)
    if not out:
        out = gl.OUT_PATH
    u.log("Saving output file...")
    u.save_csv(gl.out_array, out)
    s = f"Output file saved in {out}"
    u.log(s)
    dstr = u.get_duration_string(start_time)
    u.log(f"[rl] left_join_files: end ({dstr})")
    u.log_print('|')
コード例 #3
0
def finish(out_path, prompt, nb, start_time):

    n_dup_key = len(gl.dup_key_list)
    n_dup = len(gl.dup_list)
    bn1 = u.big_number(gl.c_tot_out)
    bn2 = u.big_number(n_dup)
    s = (f"Output file {out_path} successfully generated"
         f" ({bn1} lines written, {bn2} pure duplicates removed).")
    u.log(s)
    if n_dup > 0:
        if nb != 0:
            out_dup = gl.OUT_DUP_FILE + str(nb) + gl.FILE_TYPE
        else:
            out_dup = gl.OUT_DUP_FILE + gl.FILE_TYPE
        u.save_csv(gl.dup_list, out_dup)
        u.log(f"Pure duplicates list written in {out_dup}")
        u.log_example(gl.dup_list, "pure duplicates")
    if n_dup_key > 0:
        if prompt:
            prompt_dup_key(n_dup_key)
        else:
            u.save_csv(gl.dup_key_list, gl.OUT_DUP_KEY_FILE)
            s = f"{n_dup_key} key duplicates found. List written in {gl.OUT_DUP_KEY_FILE}"
            u.log(s)

    dstr = u.get_duration_string(start_time)
    u.log(f"[dq] sort_file: end ({dstr})")
コード例 #4
0
def finish_this(start_time):

    gl.cnx.close()
    os.remove(gl.tmp_file_chunk)
    bn = u.big_number(gl.c_main)
    dstr = u.get_duration_string(start_time)
    u.log(f"{bn} lines exported")
    u.log(f"[sql] upload: end ({dstr})")
コード例 #5
0
ファイル: finish.py プロジェクト: paularnaud2/ParTools
def finish_xml(out_path, start_time):

    dstr = u.get_duration_string(start_time)
    bn = u.big_number(gl.N_WRITE)
    s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})"
    u.log(s)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
コード例 #6
0
ファイル: functions.py プロジェクト: paularnaud2/ParTools
def finish_dq(start_time):

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"[dq] run_dq: end ({dstr})"
    u.log(s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.paths["out"])
コード例 #7
0
def finish(out_path, start_time):

    u.log("Filtering over")
    bn1 = u.big_number(gl.n_r)
    bn2 = u.big_number(gl.n_o)
    s = (f"{bn1} lines read in the input file and"
         f" {bn2} lines to be written in the output file")
    u.log(s)

    u.log("Writing output file...")
    u.save_csv(gl.out_list, out_path)
    s = f"Output file saved in {out_path}"
    u.log(s)
    dstr = u.get_duration_string(start_time)
    u.log(f"[toolFilter] filter: end ({dstr})")
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
コード例 #8
0
ファイル: functions.py プロジェクト: paularnaud2/ParTools
def finish(start_time):
    import partools.utils as u
    import partools.tools as to
    import partools.utils.sTools as st

    if gl.CHECK_DUP:
        s = "Checking duplicates on the first column of the output file..."
        u.log(s)
        to.find_dup(gl.OUT_PATH, col=1)
        u.log_print('|')

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"reqlist: end ({dstr})"
    u.log("[rl] " + s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "rl", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.OUT_PATH)
コード例 #9
0
ファイル: functions.py プロジェクト: paularnaud2/ParTools
def compare_files(in_1, in_2, out_path):
    from .csf import compare_sorted_files

    u.log("[dq] compare_files: start")
    start_time = time()
    u.gen_header(in_1, gl.COMPARE_FIELD, out_path)
    compare_sorted_files(in_1, in_2, out_path)

    if gl.c_diff == 0:
        u.log("Files match")
        out = True
    else:
        bn = u.big_number(gl.c_diff)
        u.log(f"{bn} differences found")
        out = False

    dstr = u.get_duration_string(start_time)
    u.log(f"[dq] compare_files: end ({dstr})")

    return out
コード例 #10
0
ファイル: finish.py プロジェクト: paularnaud2/ParTools
def finish_sbf(out_path, start_time):

    if gl.FOUND:
        lowI = gl.c_row - 1 - gl.PRINT_SIZE // 2
        if lowI < 0:
            lowI = 0
        highI = gl.c_row - 1 + gl.PRINT_SIZE // 2
        u.save_list(gl.cur_list[lowI:highI], out_path)
        s = f"Current list written in {out_path}"
        u.log(s.format())
        if gl.OPEN_OUT_FILE:
            u.startfile(out_path)
    else:
        bn = u.big_number(gl.c_main)
        s = (f"EOF reached ({bn} lines, {gl.c_list} temporary lists)"
             f", string '{gl.LOOK_FOR}' not found")
        u.log(s)

    dstr = u.get_duration_string(start_time)
    u.log(f"[toolBF] search_big_file: end ({dstr})\n")
コード例 #11
0
def send_chunk_duration(start):
    """Sends The duration of one insert to the main process.

    It is not wanted to send the duration of the first insert as it
    might be longer than expected due to cache mechanisms. Hence, the
    duration of the second insert is sent to the main process
    """

    if not gl.MD:
        return

    # We only send the duration of the second insert
    if gl.c_main // gl.NB_MAX_ELT_INSERT != 2:
        return

    if not gl.MD["T"]:
        (dms, dstr) = u.get_duration_string(start, True)
        u.log(f"Sending duration to the main process ({dstr})...")
        if dms == 0:
            dms = 1
        gl.MD["T"] = dms
コード例 #12
0
ファイル: execute.py プロジェクト: paularnaud2/ParTools
def execute(**kwargs):
    """Executes a SQL script or a PL/SQL procedure on an Oracle DB

    See README.md for guidance

    See partools/quickstart/sql_execute.py for examples of use
    """

    u.log('[sql] execute: start')
    start_time = time()
    u.init_kwargs(gl, kwargs)
    init_gl()
    script = get_final_script(gl.SCRIPT_IN)
    cnx = connect()
    c = cnx.cursor()
    if gl.PROC:
        u.log("Executing proc:")
        u.log_print(script)
        c.execute(script)
        u.log("Proc executed")
    else:
        command_list = script.split(';\n')
        n = len(command_list)
        if command_list[n - 1]:
            command_list[n - 1] = command_list[n - 1].strip(';')
        else:
            command_list = command_list[:-1]
        for command in command_list:
            u.log("Executing command:")
            u.log_print(command)
            c.execute(command)
            u.log("Command executed")
    c.close()
    cnx.commit()
    cnx.close()

    dstr = u.get_duration_string(start_time)
    u.log(f"[sql] execute: end ({dstr})")
    u.log_print()