예제 #1
0
def prompt_dup_key(n_dup_key):

    u.log_print('|')
    bn = u.big_number(n_dup_key)
    s = f"Warning: {bn} different lines with the same research key were identified"
    u.log(s)
    u.log_example(gl.dup_key_list)

    s = ("\nFile comparison may not work correctly. Here are your options:"
         "\na -> save duplicates list and quit"
         "\nb -> quit without saving duplicates list"
         "\nc -> save duplicates list and continue"
         "\nd -> continue without saving duplicates list")
    if gl.TEST_PROMPT_DK:
        u.log_print(s)
        u.log_print('c (TEST_PROMPT_DK = True)')
        command = 'c'
    else:
        command = u.log_input(s)
    u.log_print('|')
    if command == 'a' or command == 'c':
        u.save_csv(gl.dup_key_list, gl.OUT_DUP_KEY_FILE)
        s = f"List of key duplicates written in file {gl.OUT_DUP_KEY_FILE}"
        u.log(s)
    if command == 'a' or command == 'b':
        sys.exit()
예제 #2
0
def log_prepare(ar, bn_ar):

    n_dup = len(gl.dup_list)
    bn_dup = u.big_number(n_dup)
    s = f"Array prepared and saved in {ar} ({bn_ar} lines, {bn_dup} duplicates dismissed)"
    u.log(s)
    u.log_example(gl.dup_list)
예제 #3
0
def prepare_bdd():
    from .execute import execute

    if gl.EXECUTE_KWARGS:
        u.log("Preparing DB before data injection...")
        u.log_print("|")
        execute(**gl.EXECUTE_KWARGS)
예제 #4
0
def group_by():
    out_path = gl.OUT_PATH
    header = u.get_header(out_path, True)
    vol_fields = [elt for elt in header if is_vol_field(elt)]
    if len(vol_fields) == 0:
        return
    else:
        gl.COUNT = True
        vol_field = vol_fields[0]

    if not gl.MERGE_OK or not gl.range_query:
        return

    u.log('Group by on output file...')

    array_in = u.load_csv(out_path)
    gb_fields = [elt for elt in header if not is_vol_field(elt)]
    if gb_fields:
        import pandas as pd
        df = pd.DataFrame(data=array_in[1:], columns=header)
        df[vol_field] = df[vol_field].astype(int)
        df = df.groupby(by=gb_fields).sum()
        df = df.sort_values(by=vol_field, ascending=False)
        df.to_csv(path_or_buf=gl.OUT_PATH, sep=';', encoding='UTF-8')
    else:
        # if this is a simple count result without group by statement
        # results of different files are directly summed (pandas not needed)
        cur_list = [int(elt[0]) for elt in array_in[1:]]
        out = [array_in[0], [str(sum(cur_list))]]
        u.save_csv(out, gl.OUT_PATH)
    u.log('Group by over')
예제 #5
0
def gen_query_list():
    u.log("Building query list to be input in sql.dowload...")

    gl.query_var = sql.get_query(gl.QUERY_IN)
    check_var(gl.query_var)
    u.log_print(f"Base query:\n{gl.query_var}\n;")

    elt_list = prepare_elt_list(gl.ar_in)
    n_grp = math.ceil(len(elt_list) / gl.NB_MAX_ELT_IN_STATEMENT)
    size_elt_list = math.floor(math.log10(n_grp)) + 1
    i, n = 0, 0
    cur_elt_list, query_list = [], []
    for elt in elt_list:
        cur_elt_list.append(elt)
        i += 1
        if len(cur_elt_list) % gl.NB_MAX_ELT_IN_STATEMENT == 0:
            n += 1
            n_str = u.extend_str(n, '0', size_elt_list, True)
            grp = gen_group(cur_elt_list)
            query_list.append([grp, n_str])
            cur_elt_list = []
    if len(cur_elt_list) > 0:
        n += 1
        n_str = u.extend_str(n, '0', size_elt_list, True)
        grp = gen_group(cur_elt_list)
        query_list.append([grp, n_str])

    gl.query_list = query_list
    log_gen_query_list(elt_list, query_list)
예제 #6
0
def get_cnx_info():
    err = False
    if gl.CNX_INFO:
        cnx_info = gl.CNX_INFO
        s = gl.S_1.format(cnx_info)
    elif (gl.DB, gl.ENV) in cfg.CONF_ORACLE:
        cnx_info = cfg.CONF_ORACLE[(gl.DB, gl.ENV)]
        s = gl.S_2.format(gl.DB, gl.ENV, cnx_info)
    elif gl.DB in cfg.CONF_ORACLE:
        cnx_info = cfg.CONF_ORACLE[gl.DB]
        s = gl.S_3.format(gl.DB, cnx_info)
    elif not gl.DB:
        s = gl.E_1
        err = True
    elif not gl.ENV and gl.DB not in cfg.CONF_ORACLE:
        s = gl.E_2.format(gl.DB)
        err = True
    else:
        s = gl.E_3.format(gl.DB, gl.ENV)
        err = True

    if err:
        raise Exception(s)
    else:
        u.log(s)

    return cnx_info
예제 #7
0
파일: bf.py 프로젝트: paularnaud2/ParTools
def read_big_file(in_path, **kwargs):
    """Reads a potentially big file

    See in partools/tools/gl for other parameters (kwargs)

    See partools/quickstart/tools_bf.py for examples of use
    """
    from .init import init_rbf

    u.log("[toolBF] read_big_file: start")
    init_rbf()
    u.init_kwargs(gl, kwargs)
    with open(in_path, 'r', encoding='utf-8', errors='ignore') as in_file:
        line = f.read_file(in_file)
        u.log_print(line.strip("\n"))
        while line != "":
            line = f.read_file(in_file)
            u.log_print(line.strip("\n"))
            gl.c_read += 1
            if f.check_counter(in_file):
                continue
            else:
                break

    u.log("[toolBF] read_big_file: end\n")
예제 #8
0
def no_auth(mail_name,
            subject,
            var_dict=[],
            attachments=[],
            HTMLbody='',
            recipients=[],
            decrypt_key=''):
    """Sends emails using a no authentication smtp server

    See README.md for guidance

    See partools/quickstart/mail.py for examples of use

    - attachments: list of absolute path for attached files
    - var_dict: dictionary of variables to be replaced in HTMLbody
    - HTMLbody: if not input, mails/mail_name/template.html is taken
    - recipients: if not input, mails/mail_name/recipients.txt is taken
    """
    from partools import cfg

    f.init(mail_name, recipients, True)
    f.init_cfi(decrypt_key)
    msg = get.msg(subject, HTMLbody, attachments, var_dict)

    u.log(f"Sending mail '{mail_name}' to {gl.recipients}...")
    with smtplib.SMTP(cfg.HOST_NO_AUTH) as server:
        server.sendmail(gl.sender, gl.recipients, msg.as_string())
    u.log('Mail sent')
예제 #9
0
def is_test_db_defined():
    if not pt.cfg.CONF_ORACLE:
        s = "cfg.CONF_ORACLE not defined. Test aborted."
        u.log(s)
        warnings.warn(s)
        return False
    else:
        return True
예제 #10
0
def finish_this(start_time):

    gl.cnx.close()
    os.remove(gl.tmp_file_chunk)
    bn = u.big_number(gl.c_main)
    dstr = u.get_duration_string(start_time)
    u.log(f"{bn} lines exported")
    u.log(f"[sql] upload: end ({dstr})")
예제 #11
0
def init_cfi(decrypt_key=''):

    gl.cfi = u.get_confidential(decrypt_key, False)
    if not gl.cfi:
        raise Exception(gl.S_MISSING_CFI)
    u.log(f"Password decrypted: '{gl.cfi['PWD_GMAIL']}'")
    gl.sender = gl.cfi['MAIL_FROM']
    gl.From = gl.cfi['MAIL_FROM']
예제 #12
0
def check_var(query):
    var = u.g.VAR_DEL + gl.VAR_IN + u.g.VAR_DEL
    if var not in query:
        s = f"Error: query must contain {var}"
        u.log(s)
        u.log_print("Query:")
        u.log_print(query)
        raise Exception(u.g.E_MV)
예제 #13
0
def finish_del_dup(out_list, out_path, open_out):

    u.log(f"Saving list without duplicates in '{out_path}'...")
    u.save_list(out_list, out_path)
    bn_out = u.big_number(len(out_list))
    u.log(f"List saved, it has {bn_out} lines")
    if open_out:
        u.startfile(out_path)
예제 #14
0
def check_ec(file_list):
    for elt in file_list:
        if gl.EC in elt:
            s = (f"EC file found ({elt})."
                 " Meging of temporary files aborted.")
            u.log(s)
            gl.MERGE_OK = False
            return True
    return False
예제 #15
0
def init(kwargs):

    u.init_kwargs(gl, kwargs)
    init_globals()
    u.check_header(gl.IN_PATH)
    u.log(f"Loading input array from '{gl.IN_PATH}'...")
    gl.ar_in = u.load_csv(gl.IN_PATH)
    u.log("Input array loaded")
    u.log_print('|')
예제 #16
0
def connect():

    init_instant_client()
    cnx_info = get_cnx_info()
    cnx = connect_with(cnx_info)
    u.log("Connected")
    is_up_to_date(cnx)

    return cnx
예제 #17
0
def log_gen_query_list(elt_list, group_list):
    bn1 = u.big_number(len(elt_list))
    bn2 = u.big_number(len(group_list))
    s = (
        f"Query list built: {bn1} elements to be processed distributed"
        f" in {bn2} groups ({gl.NB_MAX_ELT_IN_STATEMENT} max per group)."
        f" They will be processed in parallel by {gl.MAX_DB_CNX} connection pools."
    )
    u.log(s)
예제 #18
0
def finish_xml(out_path, start_time):

    dstr = u.get_duration_string(start_time)
    bn = u.big_number(gl.N_WRITE)
    s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})"
    u.log(s)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
예제 #19
0
파일: log.py 프로젝트: paularnaud2/ParTools
def inject():
    s1 = "Injecting data in DB"
    if gl.ref_chunk != 0:
        bn = u.big_number(gl.ref_chunk * gl.NB_MAX_ELT_INSERT)
        s = s1 + f" (recovering from line {bn})"
    else:
        s = s1
    s += "..."
    u.log(s)
예제 #20
0
def init_dq(kwargs):
    u.log("[dq] run_dq: start")
    u.init_kwargs(gl, kwargs)
    init_tmp_dir()
    set_paths()
    s = (
        f"run_dq job initialised. Input files {gl.paths['in1']} and {gl.paths['in2']}"
        " are going to be sorted and compared.")
    u.log(s)
    u.log_print('|')
예제 #21
0
def finish_dq(start_time):

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"[dq] run_dq: end ({dstr})"
    u.log(s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.paths["out"])
예제 #22
0
def diff_list(list1, list2, out_path):

    if not out_path:
        out_path = u.g.dirs['OUT'] + 'file_match_out.csv'

    out1 = [e for e in list1 if e not in list2]
    out2 = [e for e in list2 if e not in list1]
    out = to.del_dup_list(out1 + out2)
    u.save_list(out, out_path)
    u.log(f"Comparison result available here: {out_path}")
예제 #23
0
파일: csf.py 프로젝트: paularnaud2/ParTools
def finish(out_path):

    nb_out = u.big_number(gl.c_out)
    nb_1 = u.big_number(gl.c_1)
    nb_2 = u.big_number(gl.c_2)
    s = (f"Output file successfully generated in {out_path}\n"
         f"\t\t{nb_1} lines read in file 1\n"
         f"\t\t{nb_2} lines read in file 2\n"
         f"\t\t{nb_out} lines written in output file")
    u.log(s)
예제 #24
0
def init_array_list():

    counter = 1
    gl.array_list = [[]]
    while counter < gl.c_file:
        counter += 1
        gl.array_list.append([])

    nb = gl.c_row_max
    s = (f"Buffer array initialised. It can hold a maximum of {nb} lines.")
    u.log(s)
예제 #25
0
def ttry(f, e_ref, *args, **kwargs):

    exception_occured = False
    try:
        f(*args, **kwargs)
    except Exception as e:
        assert u.like(str(e), e_ref)
        u.log(f"[ttry] Exception caught match expected ('{e_ref}')")
        exception_occured = True

    assert exception_occured
예제 #26
0
def rewrite_tmp_file(tmp_file_list, tmp_file_path, n_written_rows):
    # Rewriting tmp file without the lines written in buffer array

    if len(tmp_file_list) > 0:
        with open(tmp_file_path, 'w', encoding='utf-8') as tmp_file:
            for line in tmp_file_list[n_written_rows:]:
                tmp_file.write(line)
    else:
        # If void, tmp file is deleted
        os.remove(tmp_file_path)
        u.log(f"Deleting temporary file no. {gl.c_col}")
예제 #27
0
def is_up_to_date(cnx):
    if not gl.TEST_IUTD:
        if gl.DB not in gl.IUTD_LIST or gls.iutd:
            return

    u.log(f"IUTD (Is Up To Date) check for DB {gl.DB}")
    d_now = datetime.now().strftime("%Y/%m/%d")
    if iutd_file(d_now):
        return

    iutd_db(d_now, cnx)
예제 #28
0
파일: log.py 프로젝트: paularnaud2/ParTools
def write_rows_finish(q_name, i, cnx_nb):
    bn = u.big_number(i)
    if q_name == 'MONO':
        return
    elif gl.MAX_DB_CNX == 1 or cnx_nb == 0:
        s = f"All lines written for query '{q_name}' ({bn} lines written)"
        u.log(s)
    else:
        s = (f"All lines written for query '{q_name}'"
             f" ({bn} lines written, connection no. {cnx_nb})")
        u.log(s)
예제 #29
0
def compare_headers(in1, in2):

    line1 = u.get_header(in1)
    line2 = u.get_header(in2)

    if line1 != line2:
        s = (f"Error: files {in1} and {in2} don't have the same header."
             " Input files must have the same header.")
        u.log(s)
        raise Exception(u.g.E_DH)

    return True
예제 #30
0
def gen_cnx_dict(nb):

    init_instant_client()
    cnx_info = get_cnx_info()
    gl.cnx_dict = dict()
    i = 1
    while i <= nb:
        u.log(f'Creating connection no. {i}...')
        gl.cnx_dict[i] = connect_with(cnx_info)
        is_up_to_date(gl.cnx_dict[i])
        u.log(f'Connection no. {i} created')
        i += 1