Example #1
0
def test_dq():
    u.init_log('test_dq', True)
    u.mkdirs(gl.OUT_DIR, True)
    u.log_print()

    u.log_print("Test dq no header", dashes=100)
    ttry(td.dq_t, g.E_MH, gl.IN_MH, gl.IN12, gl.OUT1)
    ttry(td.dq_t, g.E_DH, gl.IN11, gl.IN_DH, gl.OUT1)

    u.log_print("Test dup key", dashes=100)
    td.dq_t(gl.IN_DK, gl.IN12, gl.OUT1, tpd=True)

    u.log_print("Test different files comparison", dashes=100)
    dq.file_match(gl.REF1_F, gl.REF2_F, err=False, out_path=gl.OUT_FM)
    dq.file_match(gl.OUT_FM, gl.REF_FDM)

    u.log_print("Test dq No. 1", dashes=100)
    td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1, 100, gl.REF_DUP1, sl=10)
    td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1, 15, gl.REF_DUP1)
    td.dq_t(gl.IN11, gl.IN12, gl.OUT1, gl.REF1_E, eq=True)

    u.log_print("Test dq No. 2", dashes=100)
    td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2, 100, gl.REF_DUP2, 2)
    td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2, 15, gl.REF_DUP2, 2)
    td.dq_t(gl.IN21, gl.IN22, gl.OUT2, gl.REF2_E, eq=True)

    u.log_print("Test dq No. 3", dashes=100)
    td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3, 15)
    td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3_E, eq=True)
    td.dq_t(gl.IN31, gl.IN32, gl.OUT3, gl.REF3, 100, tps=True, mls=6)
    td.file_match(gl.REF_SPLIT_3, gl.OUT_SPLIT_3)

    u.check_log(td.CL)
Example #2
0
def init_globals():

    get_footprint()
    TMP_DIR = u.g.dirs['TMP'] + gl.TMP_FOLDER + gl.footprint + '/'
    u.mkdirs(TMP_DIR)
    gl.OUT_LEFT = TMP_DIR + gl.OUT_LEFT_FILE
    gl.OUT_RIGHT = TMP_DIR + gl.OUT_RIGHT_FILE
    gl.OUT_SQL = TMP_DIR + gl.OUT_SQL_FILE
Example #3
0
def init(kwargs):
    from .connect import connect
    from .init import init_gl

    u.init_kwargs(gl, kwargs)
    init_gl()
    u.mkdirs(gl.TMP_DIR)

    gl.ref_chunk = 0
    gl.c_main = 0
    gl.c_chunk = 0
    gl.cnx = connect()
    gl.c = gl.cnx.cursor()
    gl.data = []
Example #4
0
def move_tmp_folder():

    gl.MERGE_OK = False
    out_dir = gl.OUT_DIR

    u.mkdirs(out_dir, True)
    u.log(f"Output folder {out_dir} created")

    file_list = u.list_files(gl.TMP_DIR, False)
    n = len(file_list)
    u.log(f"Moving {n} files to the output folder....")
    for elt in file_list:
        cur_path = gl.TMP_DIR + elt
        target_path = out_dir + elt
        move(cur_path, target_path)
    u.log(f"Files moved to {out_dir}")
Example #5
0
def test_tools():
    u.init_log('test_tools', True)
    u.mkdirs(gl.OUT_DIR, True)
    u.log_print()

    u.log_print("Test tools.xml", dashes=100)
    tt.parse_xml()
    dq.file_match(gl.XML_OUT, gl.XML_OUT_REF)

    u.log_print("Test toolSplit", dashes=100)
    tt.split()

    u.log_print("Test toolDup - to.find_dup simple", dashes=100)
    to.find_dup(gl.DUP_IN, gl.DUP_OUT)
    u.log_print()
    dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF)

    u.log_print("Test toolDup - to.find_dup col", dashes=100)
    to.find_dup(gl.DUP_COL_IN, col=1)
    u.log_print()
    dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF)

    u.log_print("Test toolDup - to.del_dup + shuffle", dashes=100)
    to.shuffle_file(gl.DUP_IN, gl.SHUF_OUT)
    u.log_print()
    to.del_dup(gl.SHUF_OUT, gl.DUP_OUT)
    u.log_print()
    dq.file_match(gl.DUP_OUT, gl.DEL_DUP_OUT_REF)

    u.log_print("Test toolDup - to.find_dup_list", dashes=100)
    list_in = u.load_csv(gl.DUP_IN)
    dup_list = to.find_dup_list(list_in)
    u.save_csv(dup_list, gl.DUP_OUT)
    dq.file_match(gl.DUP_OUT, gl.DUP_OUT_REF)

    u.log_print("Test toolFilter", dashes=100)
    tt.flt()

    u.log_print("Test BF", dashes=100)
    tt.read_big_file()
    tt.search_big_file()
    bf.sort_big_file(ts.gl.IN, gl.SORT_BF_OUT)
    dq.file_match(ts.gl.IN, gl.SORT_BF_OUT, del_dup=True)

    u.check_log(tt.CL)
Example #6
0
def init_find_dup(in_path, out_path, col):

    if not out_path:
        tmp_dir = u.g.dirs['TMP'] + gl.TMP_FOLDER
        u.mkdirs(tmp_dir)
        out_path = tmp_dir + gl.TMP_OUT
    s = "Searching duplicates in "
    if col == 0:
        u.log(f"{s} file {in_path}")
        cur_list = u.load_txt(in_path)
    else:
        u.log(f"{s}column no. {col} of file {in_path}")
        cur_list = u.load_csv(in_path)
        cur_list = [x[col - 1] for x in cur_list]
        if u.has_header(cur_list):
            cur_list = cur_list[1:]

    return (cur_list, out_path)
Example #7
0
def recover():

    file_list = u.list_files(gl.TMP_DIR, False)
    a = len(file_list)
    if a == 0:
        return

    s = "Work in progress detected. Recover? (y/n)"
    if gl.TEST_RECOVER:
        u.log(s)
        u.log_print("y (TEST_RECOVER = True)")
    elif u.log_input(s) == 'n':
        u.mkdirs(gl.TMP_DIR, True)
        return

    modify_ql(file_list)
    u.log("Query list modified according previous work in progress. "
          f"Recovering from query '{gl.QUERY_LIST[0][1]}'.")
Example #8
0
def test_rl():
    u.init_log('test_rl', True)
    if not ts.is_test_db_defined():
        return

    u.mkdirs(gl.TMP_DIR, True)
    u.mkdirs(ts.gl.TMP_DIR, True)
    u.mkdirs(gl.OUT_DIR, True)
    u.log_print()

    u.log_print('Test join', dashes=100)
    tr.left_join_files(gl.LEFT_1, gl.RIGHT_1, gl.OUT_JOIN_REF_1)
    tr.left_join_files(gl.LEFT_2, gl.RIGHT_2, gl.OUT_JOIN_REF_2)
    tr.left_join_files(gl.LEFT_3, gl.RIGHT_3, gl.OUT_JOIN_REF_3)

    u.log_print('Preparing DB', dashes=100)
    ts.upload(ts.gl.IN)
    arr = u.load_csv(ts.gl.IN)
    arr = [elt[0] for elt in arr]
    u.save_csv(arr, gl.IN_1)

    u.log_print('Test rl - no sql output', dashes=100)
    t.ttry(tr.reqlist, u.g.E_VA, gl.IN_1, gl.OUT_1, gl.QUERY_NO)

    u.log_print('Test rl - no var in query', dashes=100)
    t.ttry(tr.reqlist, u.g.E_MV, gl.IN_1, gl.OUT_1, gl.QUERY_MV)

    u.log_print('Test rl - missing header', dashes=100)
    u.save_csv(arr[1:], gl.IN_MH)
    t.ttry(tr.reqlist, u.g.E_MH, gl.IN_MH, gl.OUT_1, gl.QUERY_1)

    u.log_print('Test rl - standard', dashes=100)
    tr.reqlist(gl.IN_1, gl.OUT_1, gl.QUERY_1, cnx=1)
    tr.reqlist(gl.OUT_1, gl.OUT_2, gl.QUERY_2)
    dq.file_match(ts.gl.IN, gl.OUT_2, del_dup=True)
    dq.file_match(t.gl.OUT_DUP_TMP, gl.OUT_DUP_REF)

    u.log_print('Test rl - interuption and recovery', dashes=100)
    u.mkdirs(gl.TMP_DIR, True)
    u.log_print()
    args = [gl.OUT_1, gl.OUT_3, gl.QUERY_2]
    tr.reqlist_interrupted(*args, cnx=6)
    tr.reqlist(gl.OUT_1, gl.OUT_3, gl.QUERY_2, True, cnx=6)
    dq.file_match(gl.OUT_2, gl.OUT_3)

    ts.clean_db([ts.gl.T_TEST])

    u.check_log(tr.CL)
Example #9
0
def save_mail(HTMLbody):

    u.mkdirs(gl.mail_dir)
    gl.last_sent = gl.mail_dir + 'last_sent.html'
    u.save_list([HTMLbody], gl.last_sent)
    u.log(f"Mail saved to {gl.last_sent}")
Example #10
0
def reset():
    u.log("Resetting folders...")
    u.mkdirs(gl.TMP_DIR, True)
    u.mkdirs(gl.OUT_DIR, True)
    u.log("Reset over\n")
Example #11
0
def init_tmp_dir():
    gl.TMP_DIR = u.g.dirs['TMP'] + gl.TMP_FOLDER
    u.mkdirs(gl.TMP_DIR, True)