Esempio n. 1
0
def file_match(in1, in2, del_dup=False, err=True, out_path=''):
    """Compares two files and outputs the diff if the files don't match.
    Note that the files are sorted before comparison.
    (more generic than run_dq but doesn't work for big files)

    - del_dup: if true, duplicates are deleted before comparison
    - err: if True, an exception is raised when the files don't match
    - out_path: specifies an output path for file comparison different from default
    """

    u.log("[dq] file_match: start")

    if not out_path:
        out_path = u.g.dirs['OUT'] + 'file_match_out.csv'

    s = f"Comparing files '{in1}' and '{in2}'..."
    u.log(s)
    l1, l2 = u.load_txt(in1), u.load_txt(in2)
    l1.sort(), l2.sort()
    if del_dup:
        l1, l2 = del_dup_list(l1), del_dup_list(l2)

    res = l1 == l2
    s = "Files match" if res else "Files don't match"
    u.log(s)

    if not res:
        f.diff_list(l1, l2, out_path)
        if err:
            u.startfile(out_path)
            assert res is True

    u.log("[dq] file_match: end")
    u.log_print()
Esempio n. 2
0
def finish_del_dup(out_list, out_path, open_out):

    u.log(f"Saving list without duplicates in '{out_path}'...")
    u.save_list(out_list, out_path)
    bn_out = u.big_number(len(out_list))
    u.log(f"List saved, it has {bn_out} lines")
    if open_out:
        u.startfile(out_path)
Esempio n. 3
0
def finish_xml(out_path, start_time):

    dstr = u.get_duration_string(start_time)
    bn = u.big_number(gl.N_WRITE)
    s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})"
    u.log(s)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
Esempio n. 4
0
def finish_dq(start_time):

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"[dq] run_dq: end ({dstr})"
    u.log(s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.paths["out"])
Esempio n. 5
0
def extract_doc(in_dirs, out_path):
    """Extracts all Python doc ('# comments included) present in the 'in_dir'
    directories (can be usefull for spell check)
    """

    out = []
    for in_dir in in_dirs:
        extract_doc_from_dir(in_dir, out)

    u.save_list(out, out_path)
    u.startfile(out_path)
Esempio n. 6
0
def shuffle_file(in_path, out_path, open_out=False):
    """Shuffles the line order of a file using the native random package"""

    u.log("[toolShuf] shuffle_file: start")
    cur_list = u.load_txt(in_path)
    if u.has_header(cur_list):
        header = cur_list[0]
        cur_list = cur_list[1:]
    shuffle(cur_list)
    cur_list = [header] + cur_list
    u.save_list(cur_list, out_path)
    u.log(f"Shuffled file saved in {out_path}")
    if open_out:
        u.startfile(out_path)
    u.log("[toolShuf] shuffle_file: end")
Esempio n. 7
0
def finish_find_dup(dup_list, out_path, open_out):

    n = len(dup_list)
    if n == 0:
        u.log("No duplicates found")
        return

    bn = u.big_number(len(dup_list))
    u.log(f"{bn} duplicates found")
    u.log_example(dup_list)

    u.save_csv(dup_list, out_path)
    u.log(f"List of duplicates saved in {out_path}")
    if open_out:
        u.startfile(out_path)
Esempio n. 8
0
def finish(out_path, start_time):

    u.log("Filtering over")
    bn1 = u.big_number(gl.n_r)
    bn2 = u.big_number(gl.n_o)
    s = (f"{bn1} lines read in the input file and"
         f" {bn2} lines to be written in the output file")
    u.log(s)

    u.log("Writing output file...")
    u.save_csv(gl.out_list, out_path)
    s = f"Output file saved in {out_path}"
    u.log(s)
    dstr = u.get_duration_string(start_time)
    u.log(f"[toolFilter] filter: end ({dstr})")
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
Esempio n. 9
0
def finish(start_time):
    import partools.utils as u
    import partools.tools as to
    import partools.utils.sTools as st

    if gl.CHECK_DUP:
        s = "Checking duplicates on the first column of the output file..."
        u.log(s)
        to.find_dup(gl.OUT_PATH, col=1)
        u.log_print('|')

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"reqlist: end ({dstr})"
    u.log("[rl] " + s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "rl", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.OUT_PATH)
Esempio n. 10
0
def finish_sbf(out_path, start_time):

    if gl.FOUND:
        lowI = gl.c_row - 1 - gl.PRINT_SIZE // 2
        if lowI < 0:
            lowI = 0
        highI = gl.c_row - 1 + gl.PRINT_SIZE // 2
        u.save_list(gl.cur_list[lowI:highI], out_path)
        s = f"Current list written in {out_path}"
        u.log(s.format())
        if gl.OPEN_OUT_FILE:
            u.startfile(out_path)
    else:
        bn = u.big_number(gl.c_main)
        s = (f"EOF reached ({bn} lines, {gl.c_list} temporary lists)"
             f", string '{gl.LOOK_FOR}' not found")
        u.log(s)

    dstr = u.get_duration_string(start_time)
    u.log(f"[toolBF] search_big_file: end ({dstr})\n")