def rename_files(self, dir_list: str, temp_dir: str) -> None: """Rename files method :param dir_list: Temp directory file list used in bulk renaming process """ count = 0 print(temp_dir) for i, f in enumerate(dir_list, start=1): try: print(f) # f_name, f_ext = os.path.splitext(f) f_name = PurePath(f).stem f_ext = PurePath(f).suffix pic_date, student, a_num = f_name.split('_') pic_date = pic_date.strip().zfill(4) student = student.strip() a_num = a_num.strip() a_num_masked = a_num[5:].strip() except ValueError as err: print(f"[ERROR] {err} -- {f}") except Exception as err: print(f"[ERROR] {err} -- {f}") try: f_name_NEW = f"{pic_date}_{student}_A-{a_num_masked}{f_ext}" # os.rename(f, f_name_NEW) Path(f).rename(f_name_NEW) count += 1 except OSError as err: print(f"[ERROR] {err} -- {f_name_NEW}") except Exception as err: print(f"[ERROR] {err} -- {f_name_NEW}") print(f"Files renamed: {count}")
def get_start_time(output_path, logger): duration = timedelta(hours=1) h5_outputs = PurePath.joinpath(output_path, '**', '*.h5') # Glob allows the use of the * wildcard file_paths = glob(h5_outputs, recursive=True) files = list(map(PurePath.name, file_paths)) # Sort modifies the list in place files.sort() while len(files) > 0: try: # Get the oldest file which follows the naming convention most_recent_filename = PurePath(files[0]).name date_time_duration_str = most_recent_filename.split('-')[0] date_time_str, duration_str = date_time_duration_str.split('P') end_time = isodate.parse_datetime(date_time_str) parsed_duration = isodate.parse_duration(f'P{duration_str}') start_time = end_time - parsed_duration logger.debug('Calculated start time and parsed duration is %s %s', start_time, parsed_duration) return start_time, parsed_duration except ValueError: logger.exception( 'Exception occurred in get_start_time. Message: %s', ValueError) files.pop(0) # Determine start_time and duration without existing filename end_time = datetime.now() start_time = end_time - duration logger.debug('Calculated start time and parsed duration is %s %s', start_time, parsed_duration) return start_time, duration
def extract_dir_name(input_file): """ creates a directory path based on the specified file name :param input_file: file bane :return: full path, minus extension """ fname = PurePath(input_file).__str__() s = fname.split('.') name = '.'.join(s[:-1]) return name
def get_start_time(output_path, args, config): cli_start_time = args.get('start_time', None) if cli_start_time is None: h5_outputs = output_path.joinpath('**', '*.h5') # Glob allows the use of the * wildcard file_paths = glob(str(h5_outputs), recursive=True) files = list(map(lambda path: PurePath(path).name, file_paths)) # Sort modifies the list in place files.sort() while len(files) > 0: try: most_recent_filename = PurePath(files[-1]).name date_time_duration_str = most_recent_filename.split('-')[0] start_time, parsed_duration = parse_iso(date_time_duration_str) end_time = start_time + parsed_duration logger.debug('Calculated end time: %s', end_time) logger.debug('Parsed duration: %s', parsed_duration) return end_time, parsed_duration except ValueError: logger.debug('Ignoring %s for calculating start time.', files[-1]) files.pop() duration = timedelta(hours=1) # Overwrite the default duration if it's set in the config if 'duration' in config.keys(): str_duration = config['duration'] duration = isodate.parse_duration(f'P{str_duration}') end_time = datetime.now() if cli_start_time is None: # Determine start_time and duration without existing filename start_time = end_time - duration # Overwrite the default start_time and duration if in the config if 'start' in config.keys(): start_time = isodate.parse_datetime(config['start']) else: start_time = cli_start_time logger.debug('End time and duration are %s %s', end_time, duration) return start_time, duration
class Msgtopdf: def __init__(self, msgfile): if check_paths_exist(required_paths) is False: sys.exit(1) outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace( "MAPI") self.msgfile = PurePath(msgfile) self.directory = PurePath(self.msgfile).parent self.file = PurePath(self.msgfile).name self.file_name = self.file.split(".msg")[0] self.save_path = self.__define_save_path() self.msg = outlook.OpenSharedItem(self.msgfile) def raw_email_body(self): if self.msg.BodyFormat == 2: body = self.msg.HTMLBody self.email_format = "html" elif self.msg.BodyFormat == 3: body = self.msg.RTFBody self.email_format = "html" else: body = self.msg.Body self.email_format = "txt" self.raw_body = body return self.raw_body def email2pdf(self): Path.mkdir(Path(self.save_path)) html_header = self.__add_header_information() raw_email_body = self.raw_email_body() full_email_body = html_header + raw_email_body clean_email_body = self.replace_CID(full_email_body) self.html_body_file = PurePath(self.save_path, self.file_name + ".html") self.extract_email_attachments() # convert_html_to_pdf(clean_email_body, self.html_body_file) with open(self.html_body_file, "w", encoding="utf-8") as f: f.write(clean_email_body) # save pdf copy using wkhtmltopdf try: subprocess.run([ "wkhtmltopdf", "--enable-local-file-access", "--log-level", "warn", "--encoding", "utf-8", "--footer-font-size", "6", "--footer-line", "--footer-center", "[page] / [topage]", str(self.html_body_file), str(PurePath(self.save_path, self.file_name + ".pdf")), ]) except Exception as e: logging.critical("Could not call wkhtmltopdf") logging.debug(e) self.__delete_redundant_files() def extract_email_attachments(self): count_attachments = self.msg.Attachments.Count if count_attachments > 0: for item in range(count_attachments): attachment_filename = self.msg.Attachments.Item(item + 1).Filename self.msg.Attachments.Item(item + 1).SaveAsFile( PurePath(self.save_path, attachment_filename)) def __define_save_path(self): msgfile_name = self.file.split(".msg")[0] msgfile_folder = self.clean_path(msgfile_name) save_path = PurePath(self.directory, msgfile_folder) # TODO check if save_path already exists and if so add increment return save_path def __add_header_information(self): html_str = """ <head> <meta charset="UTF-8"> <base href="{base_href}"> <p style="font-family: Arial;font-size: 11.0pt"> </head> <strong>From:</strong> {sender}</br> <strong>Sent:</strong> {sent}</br> <strong>To:</strong> {to}</br> <strong>Cc:</strong> {cc}</br> <strong>Subject:</strong> {subject}</p> <hr> """ formatted_html = html_str.format( base_href="file:///" + str(self.save_path) + "\\", sender=self.msg.SenderName, sent=self.msg.SentOn, to=self.msg.To, cc=self.msg.CC, subject=self.msg.Subject, attachments=self.msg.Attachments, ) return formatted_html def replace_CID(self, body): self.image_files = [] # search for cid:(capture_group)@* upto " p = re.compile(r"cid:([^\"@]*)[^\"]*") r = p.sub(self.__return_image_reference, body) return r def __return_image_reference(self, match): value = str(match.groups()[0]) if value not in self.image_files: self.image_files.append(value) return value def __delete_redundant_files(self): Path.unlink(Path(self.html_body_file)) for f in self.image_files: image_full_path = Path(self.save_path, f) if Path.exists(image_full_path): Path.unlink(image_full_path) def clean_path(self, path): c_path = re.sub(r'[\\/\:*"<>\|\.%\$\^&£]', "", path) c_path = re.sub(r"[ ]{2,}", "", c_path) c_path = c_path.strip() return c_path
# %% min_disp = 70 num_disp = 10 * 16 block_size = 31 stereo = cv2.StereoBM_create(numDisparities=num_disp, blockSize=block_size) stereo.setMinDisparity(min_disp) stereo.setDisp12MaxDiff(200) stereo.setUniquenessRatio(10) stereo.setSpeckleRange(3) stereo.setSpeckleWindowSize(3) images_l = sorted(left.glob('*_Left.png')) for filename_left in images_l: filename_stem = PurePath(filename_left).stem filename_parts = filename_stem.split('_') file_number = filename_parts[0] + '_' + filename_parts[1] filename_right = right / (file_number + '_Right.png') img_l = cv2.imread(str(filename_left)) img_r = cv2.imread(str(filename_right)) dst_l, dst_r = calibrateImages(img_l, img_r) #dst_l, dst_r = biFilter(dst_l, dst_r, 17, 35) #dst_l, dst_r = gaussFilter(dst_l, dst_r, 17) gray_l = cv2.cvtColor(dst_l, cv2.COLOR_BGR2GRAY) gray_r = cv2.cvtColor(dst_r, cv2.COLOR_BGR2GRAY) disp = stereo.compute(gray_l, gray_r).astype('float') #disp1 = stereo.compute(dst_l[:,:,0], dst_r[:,:,0]).astype('float')
def __from_h5_dset(self, h5_dset, data_sel, datapoint): """ initialize S5Pmsm object from h5py dataset """ self.name = PurePath(h5_dset.name).name # copy dataset values (and error) to object if data_sel is None: if datapoint: self.value = h5_dset['value'] self.error = h5_dset['error'] else: self.value = h5_dset[...] else: # we need to keep all dimensions to get the dimensions # of the output data right if datapoint: self.value = h5_dset['value'][data_sel] self.error = h5_dset['error'][data_sel] if isinstance(data_sel, tuple): for ii, elmnt in enumerate(data_sel): if isinstance(elmnt, (int, np.int64)): self.value = np.expand_dims(self.value, axis=ii) self.error = np.expand_dims(self.error, axis=ii) else: self.value = h5_dset[data_sel] if isinstance(data_sel, tuple): for ii, elmnt in enumerate(data_sel): if isinstance(elmnt, (int, np.int64)): self.value = np.expand_dims(self.value, axis=ii) # set default dimension names if h5_dset.ndim == 1: keys_default = ['column'] elif h5_dset.ndim == 2: keys_default = ['row', 'column'] elif h5_dset.ndim == 3: keys_default = ['time', 'row', 'column'] else: raise ValueError('not implemented for ndim > 3') # copy all dimensions with size longer then 1 keys = [] dims = [] for ii in range(h5_dset.ndim): if self.value.shape[ii] == 1: continue if len(h5_dset.dims[ii]) != 1: # bug in some KMNI HDF5 files keys.append(keys_default[ii]) dims.append(np.arange(self.value.shape[ii])) elif self.value.shape[ii] == h5_dset.shape[ii]: buff = PurePath(h5_dset.dims[ii][0].name).name if len(buff.split()) > 1: buff = buff.split()[0] keys.append(buff) if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]: buff = h5_dset.dims[ii][0][:] if np.all(buff == 0): buff = np.arange(buff.size) else: # bug in some KMNI HDF5 files buff = np.arange(h5_dset.shape[ii]) dims.append(buff) else: buff = PurePath(h5_dset.dims[ii][0].name).name if len(buff.split()) > 1: buff = buff.split()[0] keys.append(buff) if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]: buff = h5_dset.dims[ii][0][:] if np.all(buff == 0): buff = np.arange(buff.size) else: # bug in some KMNI HDF5 files buff = np.arange(h5_dset.shape[ii]) if isinstance(data_sel, slice): dims.append(buff[data_sel]) elif len(data_sel) == h5_dset.ndim: dims.append(buff[data_sel[ii]]) elif not isinstance(data_sel, tuple): dims.append(buff[data_sel]) elif ii > len(data_sel): dims.append(buff[data_sel[-1]]) else: dims.append(buff[data_sel[ii]]) # add dimensions as a namedtuple coords_namedtuple = namedtuple('Coords', keys) self.coords = coords_namedtuple._make(dims) # remove all dimensions with size equal 1 from value (and error) self.value = np.squeeze(self.value) if datapoint: self.error = np.squeeze(self.error) # copy FillValue (same for value/error in a datapoint) if datapoint: self.fillvalue = h5_dset.fillvalue[0] else: self.fillvalue = h5_dset.fillvalue # copy its units if 'units' in h5_dset.attrs: if isinstance(h5_dset.attrs['units'], np.ndarray): if h5_dset.attrs['units'].size == 1: self.units = h5_dset.attrs['units'][0] if isinstance(self.units, bytes): self.units = self.units.decode('ascii') else: self.units = h5_dset.attrs['units'] if isinstance(self.units[0], bytes): self.units = self.units.astype(str) else: self.units = h5_dset.attrs['units'] if isinstance(self.units, bytes): self.units = self.units.decode('ascii') # copy its long_name if 'long_name' in h5_dset.attrs: if isinstance(h5_dset.attrs['long_name'], bytes): self.long_name = h5_dset.attrs['long_name'].decode('ascii') else: self.long_name = h5_dset.attrs['long_name']
import sys import os from pathlib import PurePath from pathlib import PureWindowsPath if len(sys.argv) == 1: print("No files imported - Please drag and drop your files") else: print('Number of given files:', len(sys.argv) - 1, 'Files') for filename in sys.argv[1:]: data = PurePath(filename).name splitted = data.split("_") filterObj = filter(lambda a: 'dat' in a, splitted) strObj = list(filterObj)[0] year = strObj[3:7] month = strObj[7:9] day = strObj[9:11] prefix = (year + "-" + month + "-" + day + "_") newfilename = str( PureWindowsPath(filename).parents[0]) + "\\" + prefix + data print("Old Filename:") print(filename) print("New Filename:") print(newfilename) print("---------") os.rename(filename, newfilename)
def transform(self): try: inpath = self.input_path l = [] if os.path.isabs(inpath): file_or_dir = os.path.splitext(inpath)[1] if file_or_dir == '' and os.path.isdir( os.path.realpath(inpath)): l = [ file for file in glob.glob( os.path.realpath(inpath) + "/*.pdf") ] elif file_or_dir == '.pdf' and os.path.isfile( os.path.realpath(inpath)): l = [os.path.realpath(inpath)] else: file_or_dir = os.path.splitext(inpath)[1] if file_or_dir == '' and os.path.isdir( os.path.join(os.getcwd(), inpath)): l = [ file for file in glob.glob( os.path.join(os.getcwd(), inpath) + "/*.pdf") ] elif file_or_dir == '.pdf' and os.path.isfile( os.path.join(os.getcwd(), inpath)): l = [os.path.join(os.getcwd(), inpath)] elif file_or_dir == '' and os.path.isdir( os.path.realpath(inpath)): l = [ file for file in glob.glob( os.path.realpath(inpath) + "/*.pdf") ] elif file_or_dir == '.pdf' and os.path.isfile( os.path.realpath(inpath)): l = [os.path.join(os.path.realpath(inpath), inpath)] else: if os.name == 'nt': l = [] for r, d, f in os.walk(os.path.realpath('\\')): if file_or_dir == '.pdf': for file in f: if len(inpath.split('\\')) == 1: if file == inpath: if os.path.isfile( os.path.join(r, file)): l.append(os.path.join(r, file)) else: if file == inpath.split( '\\')[-1] and '\\'.join( inpath.split('\\') [:-1]) in r: if os.path.isfile( os.path.join(r, file)): l.append(os.path.join(r, file)) elif file_or_dir == '': if inpath == r[-len(inpath):]: l += [ file for file in glob.glob(r + "/*.pdf") ] elif os.name == 'posix': l = [] for r, d, f in os.walk('/'): if file_or_dir == '.pdf': for file in f: if len(inpath.split('/')) == 1: if file == inpath: if os.path.isfile( os.path.join(r, file)): l.append(os.path.join(r, file)) else: if file == inpath.split( '/')[-1] and '/'.join( inpath.split('/') [:-1]) in r: if os.path.isfile( os.path.join(r, file)): l.append(os.path.join(r, file)) elif file_or_dir == '': if inpath == r[-len(inpath):]: l += [ file for file in glob.glob(r + "/*.pdf") ] if l == []: if file_or_dir == '': raise FileNotFoundError( "This is either a non-existent directory or no pdf file exists in this directory" ) elif file_or_dir == '.pdf': raise FileNotFoundError("This is an non-existent pdf file") else: raise ValueError("Only pdf files are acceptable as input") if os.name == 'nt': dirg = '' if self.which_win('gswin64c.exe') is not None: dirg = str(PurePath(self.which_win('gswin64c.exe'))) elif self.which_win('gswin32c.exe') is not None: dirg = str(PurePath(self.which_win('gswin64c.exe'))) else: for r, d, f in os.walk(os.path.realpath('\\')): for files in f: if files == 'gswin64c.exe' or files == 'gswin32c.exe': if self.is_exe(os.path.join(r, files)): dirt = str(PurePath(os.path.join(r, files))) if dirg is None or dirg == '': sys.exit( "Please install Ghostscript, if you want to complete this task" ) dirt = '' if self.which_win('tesseract.exe') is not None: dirt = str(PurePath(self.which_win('tesseract.exe'))) else: for r, d, f in os.walk(os.path.realpath('\\')): for files in f: if files == "tesseract.exe": if self.is_exe(os.path.join(r, files)): dirt = str(PurePath(os.path.join(r, files))) if dirt is None or dirt == '': sys.exit( "Please install Tesseract-OCR, if you want to complete this task" ) for inp in l: si = subprocess.STARTUPINFO() si.dwFlags |= subprocess.STARTF_USESHOWWINDOW directory = str(PurePath(inp).parent) inp = PurePath(inp).name try: call = PurePath(dirg).name call = call.split('.')[0] outp = inp.split('.')[0] + '.tiff' command_line = call + ' -q -r300 -dNOPAUSE -sDEVICE=tiffgray -dBATCH -dINTERPOLATE -o ' + outp + ' -f ' + inp + ' -c quit' args = shlex.split(command_line) proc = subprocess.Popen(args, executable=dirg, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=directory, startupinfo=si) proc.wait() except: print( "Error while running Ghostscript subprocess. Traceback:" ) print("Traceback:\n%s" % traceback.format_exc()) stdout, stderr = proc.communicate() if stderr: print("Ghostscript stderr:\n'%s'" % stderr) try: read_pdf = outp.split('.')[0] + '_new' command_line1 = 'tesseract ' + outp + ' ' + read_pdf + ' pdf' args1 = shlex.split(command_line1) proc1 = subprocess.Popen(args1, executable=dirt, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=directory, startupinfo=si) proc1.wait() except: print( "Error while running Tesseract subprocess. Traceback:" ) print("Traceback:\n%s" % traceback.format_exc()) stdout1, stderr1 = proc1.communicate() if stderr1: print("Tesseract stderr:\n'%s'" % stderr1) try: os.remove(directory + '\\' + outp) except OSError: raise elif os.name == 'posix': check1 = subprocess.Popen(['which', 'tesseract'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) check1.wait() c1stdout, c1stderr = check1.communicate() if c1stdout == b'': raise FileNotFoundError( "Please install Tesseract-OCR, if you want to complete this task" ) check2 = subprocess.Popen(['which', 'gs'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) check2.wait() c2stdout, c2stderr = check2.communicate() if c2stdout == b'': raise FileNotFoundError( "Please install Ghostscript, if you want to complete this task" ) for inp in l: directory = str(PurePath(inp).parent) inp = PurePath(inp).name try: outp = inp.split('.')[0] + '.tiff' command_line = 'gs -q -r300 -dNOPAUSE -sDEVICE=tiffgray -dBATCH -dINTERPOLATE -o ' + outp + ' -f ' + inp + ' -c quit' args = shlex.split(command_line) proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=directory) proc.wait() except: print( "Error while running Ghostscript subprocess. Traceback:" ) print("Traceback:\n%s" % traceback.format_exc()) stdout, stderr = proc.communicate() if stderr: print("Ghostscript stderr:\n'%s'" % stderr) try: read_pdf = outp.split('.')[0] + '_new' command_line1 = 'tesseract ' + outp + ' ' + read_pdf + ' pdf' args1 = shlex.split(command_line1) proc1 = subprocess.Popen(args1, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=directory) proc1.wait() except: print( "Error while running Tesseract subprocess. Traceback:" ) print("Traceback:\n%s" % traceback.format_exc()) stdout1, stderr1 = proc1.communicate() if stderr1: print("Tesseract stderr:\n'%s'" % stderr1) try: os.remove(directory + '/' + outp) except OSError: raise else: sys.exit('Only for Windows or Linux') except OSError as error: if error.errno == errno.ENOENT: raise FileNotFoundError() elif error.errno in [errno.EPERM, errno.EACCES]: raise PermissionError() else: raise return "1"