def get_iutd_query(): if gl.TEST_IUTD: query = u.load_txt(f"{gl.QUERY_DIR}IUTD_TEST.sql", False) else: query = u.load_txt(f"{gl.QUERY_DIR}IUTD_{gl.DB}", False) return query
def file_match(in1, in2, del_dup=False, err=True, out_path=''): """Compares two files and outputs the diff if the files don't match. Note that the files are sorted before comparison. (more generic than run_dq but doesn't work for big files) - del_dup: if true, duplicates are deleted before comparison - err: if True, an exception is raised when the files don't match - out_path: specifies an output path for file comparison different from default """ u.log("[dq] file_match: start") if not out_path: out_path = u.g.dirs['OUT'] + 'file_match_out.csv' s = f"Comparing files '{in1}' and '{in2}'..." u.log(s) l1, l2 = u.load_txt(in1), u.load_txt(in2) l1.sort(), l2.sort() if del_dup: l1, l2 = del_dup_list(l1), del_dup_list(l2) res = l1 == l2 s = "Files match" if res else "Files don't match" u.log(s) if not res: f.diff_list(l1, l2, out_path) if err: u.startfile(out_path) assert res is True u.log("[dq] file_match: end") u.log_print()
def extract_doc_from_file(path, out): out.append(path) out.append(u.extend_str('', '-', 100)) x = u.load_txt(path) description = False n_written = 0 for i, line in enumerate(x): append = False if '#' in line or u.like(line, '*"""*"""*'): append = True elif '"""' in line and description is False: description = True append = True elif '"""' in line and description is True: description = False append = True elif description is True: append = True if append: line = line.strip() out.append(line) n_written += 1 if n_written > 0: out.append(u.extend_str('', '-', 100)) out.append('') out.append('') else: del out[-2:]
def set_query_var(query_in): if u.like(query_in, "*.sql"): query = u.load_txt(query_in, False) else: query = query_in query = query.strip('\r\n;') check_var(query) gl.query_var = query
def recipients(check_internal): recipients_path = gl.mail_dir + gl.RECIPIENTS u.log(f"Getting recipients from {recipients_path}") if not exists(recipients_path): s = gl.S_MISSING.format('Recipients', recipients_path) raise Exception(s) recipients = u.load_txt(recipients_path) f.is_configured(recipients, recipients_path) if check_internal: f.check_internal(recipients) return recipients
def HTML(HTMLbody, var_dict): if not HTMLbody: html_path = gl.mail_dir + 'template.html' if not exists(html_path): s = gl.S_MISSING.format('Template', html_path) raise Exception(s) HTMLbody = u.load_txt(html_path, list_out=False) u.log(f"HTML template {html_path} successfully loaded") if var_dict: HTMLbody = u.replace_from_dict(HTMLbody, var_dict) u.log("Template variables have been replaced") return HTMLbody
def shuffle_file(in_path, out_path, open_out=False): """Shuffles the line order of a file using the native random package""" u.log("[toolShuf] shuffle_file: start") cur_list = u.load_txt(in_path) if u.has_header(cur_list): header = cur_list[0] cur_list = cur_list[1:] shuffle(cur_list) cur_list = [header] + cur_list u.save_list(cur_list, out_path) u.log(f"Shuffled file saved in {out_path}") if open_out: u.startfile(out_path) u.log("[toolShuf] shuffle_file: end")
def del_dup(in_path, out_path, open_out=False): """Deletes the duplicates in in_path file""" from .finish import finish_del_dup u.log("[toolDup] del_dup: start") u.log(f"Deleting duplicates in file '{in_path}'...") cur_list = u.load_txt(in_path) bn = u.big_number(len(cur_list)) u.log(f"File loaded, {bn} lines to be analysed") if u.has_header(cur_list): out_list = [cur_list[0]] + del_dup_list(cur_list[1:]) else: out_list = del_dup_list(cur_list) finish_del_dup(out_list, out_path, open_out) u.log("[toolDup] del_dup: end")
def iutd_file(d_now): if exists(gl.iutd_path): d_old = u.load_txt(gl.iutd_path)[0] if d_now == d_old: gls.iutd = True u.log("IUTD check OK") return True else: u.log_print('|') s = "The date found in the check file doesn't match the current date" u.log(s) return False else: u.log_print('|') u.log("Can't find IUTD check file") return False
def init_find_dup(in_path, out_path, col): if not out_path: tmp_dir = u.g.dirs['TMP'] + gl.TMP_FOLDER u.mkdirs(tmp_dir) out_path = tmp_dir + gl.TMP_OUT s = "Searching duplicates in " if col == 0: u.log(f"{s} file {in_path}") cur_list = u.load_txt(in_path) else: u.log(f"{s}column no. {col} of file {in_path}") cur_list = u.load_csv(in_path) cur_list = [x[col - 1] for x in cur_list] if u.has_header(cur_list): cur_list = cur_list[1:] return (cur_list, out_path)
def check_recover(): chunk = gl.tmp_file_chunk if os.path.exists(chunk): s = "Injection running detected. Recover? (y/n)" if gl.TEST_RECOVER: u.log(s) u.log_print("y (TEST_RECOVER = True)") elif u.log_input(s) == "n": os.remove(chunk) return False txt = u.load_txt(chunk) try: gl.ref_chunk = int(txt[0]) return True except Exception as e: log.recover_fail(e, chunk, txt) os.remove(chunk) return False
def gen_query_list(rg_file_name): rg_path = gl.RANGE_DIR + rg_file_name + gl.FILE_TYPE rg_list = u.load_txt(rg_path) gl.QUERY_LIST = [[elt, elt] for elt in rg_list] u.log(f"Range query detected. Base query:\n{gl.query}\n;")
def ast(in1, in2): s = u.load_txt(mail.gl.last_sent, False) assert in1 in s and in2 in s u.log(f"'{in1}' and '{in2}' found in last_sent")