Example #1
0
def check_for_duplicates(paths, hash=hashlib.sha1, error_log=log):
    hashes = {}
    time_now = datetime.now().strftime("%Y-%m-%d_%H%M%S")
    exclude = [
        "00_Archive", "00_archive", "01_Archive", "01_archive",
        "00_Document_templates", "01_Deleted lines", "02_Red_Corex",
        "03_Additional_Workfiles", "SKID", "Bare"
    ]

    wb_save_path = Path("N:\\DGAVRIC\\_ITTER")
    wb_save_name = "duplicated_files_proba"
    wb_rev = "02"  # workbook revision
    wb = Workbook()  # workbook
    ws = wb.active  # workbook sheet activate
    r = 1  # initial row number

    log_file = "error_logfile"
    logger = error_log.get_logger(f"{wb_save_name}_{log_file}")

    for path in paths:
        fn_list = dl.dir_list(path,
                              typ="f",
                              lookup="*",
                              extension="*",
                              exclude=exclude)
        for file in fn_list:
            hashobj = hash()
            for chunk in chunk_reader(open(file, 'rb')):
                hashobj.update(chunk)
            file_id = (hashobj.digest(), os.path.getsize(file))
            duplicate = hashes.get(file_id, None)
            if duplicate:
                # print("Duplicate found: %s and %s" % (file, duplicate))
                try:
                    ws.cell(r, 1, f'=HYPERLINK("{file}","Open")')
                    ws.cell(r, 2, file.name)
                    ws.cell(r, 3, str(file))  # file path
                    ws.cell(r, 5, f'=HYPERLINK("{duplicate}","Open")')
                    ws.cell(r, 6, duplicate.name)
                    ws.cell(r, 7, str(duplicate))  # file path
                    r += 1
                except Exception as error:
                    logger.exception(f"{wb_save_name}_{file} --> {error}")
            else:
                hashes[file_id] = file

    wb.save(
        Path.joinpath(wb_save_path,
                      f"{wb_rev}_{wb_save_name}_{time_now}.xlsx"))
    wb.close()
Example #2
0
from datetime import datetime
from pathlib import Path
from openpyxl import Workbook
from openpyxl.utils.cell import column_index_from_string, get_column_letter
import dir_list_r01 as dl

root_path = Path(
    "J:\\32_IZ224_SIEMENS_Herne\\60_Construction\\20_Sx_Working\\50_Workfiles")
exclude = [
    "00_Archive", "01_Archive", "00_Document_templates", "01_Deleted lines",
    "SKID"
]
fn_list = list()
pattern = r"(\d\d)(BR)"

for i in dl.dir_list(root_path, obj_type="d", exclude=exclude):
    if re.search(pattern, i.name):
        fn_list.append(i)


def buy(**kwargs):
    time_now = datetime.now().strftime("%Y-%m-%d_%H%M%S")
    # wb_save_path = Path("D:\\00_HERNE\\_tracking")
    # wb_save_name = "_wf_list"
    wb = Workbook()  # workbook
    ws = wb.active  # workbook sheet activate
    r = 1  # initial row number

    for name, value in kwargs.items():
        ws.cell(r, column_index_from_string(value[0]), {value[1]})  # system
Example #3
0
time_now = datetime.now().strftime(
    "%Y-%m-%d_%H%M%S")  # date/time in format as (Y-m-d_HMS)
# root_path = Path("J:\\32_IZ224_SIEMENS_Herne\\60_Construction\\10_Sx_Input\\30_Sx_Project_Documentation\\10_Mechanical_Engineering_Project\\50_H&S_drawings")     # main path
root_path = Path(
    "J:\\32_IZ224_SIEMENS_Herne\\60_Construction\\20_Sx_Working\\50_Workfiles\\01_Deleted lines"
)
exclude_dir = [
    "00_Archive", "00_archive", "01_Archive", "01_archive",
    "00_Document_templates", "SKID", "02_Red_Corex", "03_Additional_Workfiles",
    "Deleted"
]  # excluded folders (these are skipped)

dlist = dl.dir_list(root_path,
                    obj_type="f",
                    src_for="60*BR*",
                    ext="pdf",
                    exclude=exclude_dir)  # list of required files

wb_save_path = Path("D:\\00_HERNE\\_tracking\\")  # workbook save path
wb_file_name = "deleted_lines_pdf_parsed_support_list"  # workbook save filename
wb_rev = "00"  # workbook revision
wb = Workbook()  # workbook
ws = wb.active  # workbook sheet activate
r = 1  # initial row number

log_file = "error_logfile"
log = log.get_logger(f"{wb_file_name}_{log_file}")

for file in dlist:
    try:
Example #4
0
from dir_list_r01 import dir_list
from pathlib import Path
from pdf_parser import parse, re_split, parse2, get_pdf_content_lines, parse3, parse4, parse5
import os

main_dir = Path("D:/_test_ground/_zeran")

ref_ls = [a.stem[:-3] for a in dir_list(main_dir, extension="pdf")]
con_ls = [a.stem[:-3] for a in dir_list(main_dir, extension="pdf")]

# latest = max(ref_ls, key=os.path.getctime)

test = all(map(lambda x, y: x == y, ref_ls, con_ls))

# print(test)


print(any(x in ref_ls for x in ref_ls))

file = Path(r"D:\_test_ground\_zeran\01_LBA_01_LB-HP\2018-07-25\Z214LBA25BR010_00.pdf")
# file = Path(r"D:\_test_ground\_zeran\01_LBA_01_LB-HP\2018-07-25\Z214LBA10BR010_00.pdf")
delimiters = " ", "\n"

# print(parse(file))

# print(list(filter(None, re_split(delimiters, parse(file),  maxsplit=0))))
# print(re_split(delimiters, parse(file),  maxsplit=0))
# print(parse2(file))

# print(searchInPDF(file))
Example #5
0
time_now = datetime.now().strftime("%Y-%m-%d_%H%M%S")
root_path = Path("D:\\00_PRJS\\ITER\\08_Ax_Tender_Documentation")
# root_path = Path("J:\\32_IZ224_SIEMENS_Herne\\60_Construction\\20_Sx_Working\\50_Workfiles")
# root_path = Path("J:\\32_IZ224_SIEMENS_Herne\\60_Construction\\10_Sx_Input\\30_Sx_Project_Documentation\\10_Mechanical_Engineering_Project\\50_H&S_drawings")
exclude = ["00_Archive", "00_archive", "01_Archive", "01_archive", "00_Document_templates", "02_Red_Corex", "03_Additional_Workfiles", "SKID", "Deleted", "Bare"]

# ls = dl.dir_list(root_path, ext="pdf", exclude=exclude)     # list of valves
"""
# fn_list = list()
pattern = r"(\d\d)(BQ)"
for i in dl.dir_list(root_path, exclude=exclude):
    if re.search(pattern, i.name):
        fn_list.append(i)
"""

fn_list = dl.dir_list(root_path, obj_type="f", src_for="*_*_*_*_*_V*.*.*", ext="pdf", exclude=exclude)
wb_save_path = Path("D:\\00_PRJS\\ITER")
wb_save_name = "procedure_list"
wb_rev = "00"     # workbook revision
wb = Workbook()     # workbook
ws = wb.active      # workbook sheet activate
r = 1       # initial row number
# patt = r"(\d{9})"

log_file = "error_logfile"
log = log.get_logger(f"{wb_save_name}_{log_file}")

for file in fn_list:
    if "redmark" not in str(file):
        try:
            ws.cell(r, 1, f'=HYPERLINK("{file}","Open")')