Ejemplo n.º 1
0
    def rename_files(self, dir_list: str, temp_dir: str) -> None:
        """Rename files method
        :param dir_list: Temp directory file list used in bulk renaming process
        """
        count = 0
        print(temp_dir)
        for i, f in enumerate(dir_list, start=1):
            try:
                print(f)
                # f_name, f_ext = os.path.splitext(f)
                f_name = PurePath(f).stem
                f_ext = PurePath(f).suffix
                pic_date, student, a_num = f_name.split('_')
                pic_date = pic_date.strip().zfill(4)
                student = student.strip()
                a_num = a_num.strip()
                a_num_masked = a_num[5:].strip()
            except ValueError as err:
                print(f"[ERROR] {err} -- {f}")
            except Exception as err:
                print(f"[ERROR] {err} -- {f}")

            try:
                f_name_NEW = f"{pic_date}_{student}_A-{a_num_masked}{f_ext}"
                # os.rename(f, f_name_NEW)
                Path(f).rename(f_name_NEW)
                count += 1
            except OSError as err:
                print(f"[ERROR] {err} -- {f_name_NEW}")
            except Exception as err:
                print(f"[ERROR] {err} -- {f_name_NEW}")

        print(f"Files renamed: {count}")
def get_start_time(output_path, logger):
    duration = timedelta(hours=1)
    h5_outputs = PurePath.joinpath(output_path, '**', '*.h5')
    # Glob allows the use of the * wildcard
    file_paths = glob(h5_outputs, recursive=True)
    files = list(map(PurePath.name, file_paths))
    # Sort modifies the list in place
    files.sort()

    while len(files) > 0:
        try:
            # Get the oldest file which follows the naming convention
            most_recent_filename = PurePath(files[0]).name
            date_time_duration_str = most_recent_filename.split('-')[0]
            date_time_str, duration_str = date_time_duration_str.split('P')
            end_time = isodate.parse_datetime(date_time_str)
            parsed_duration = isodate.parse_duration(f'P{duration_str}')
            start_time = end_time - parsed_duration
            logger.debug('Calculated start time and parsed duration is %s %s',
                         start_time, parsed_duration)
            return start_time, parsed_duration

        except ValueError:
            logger.exception(
                'Exception occurred in get_start_time. Message: %s',
                ValueError)
            files.pop(0)

    # Determine start_time and duration without existing filename
    end_time = datetime.now()
    start_time = end_time - duration
    logger.debug('Calculated start time and parsed duration is %s %s',
                 start_time, parsed_duration)
    return start_time, duration
Ejemplo n.º 3
0
def extract_dir_name(input_file):
    """
    creates a directory path based on the specified file name
    :param input_file: file bane
    :return: full path, minus extension
    """
    fname = PurePath(input_file).__str__()
    s = fname.split('.')
    name = '.'.join(s[:-1])
    return name
Ejemplo n.º 4
0
def get_start_time(output_path, args, config):
    cli_start_time = args.get('start_time', None)

    if cli_start_time is None:
        h5_outputs = output_path.joinpath('**', '*.h5')
        # Glob allows the use of the * wildcard
        file_paths = glob(str(h5_outputs), recursive=True)
        files = list(map(lambda path: PurePath(path).name, file_paths))
        # Sort modifies the list in place
        files.sort()

        while len(files) > 0:
            try:
                most_recent_filename = PurePath(files[-1]).name
                date_time_duration_str = most_recent_filename.split('-')[0]
                start_time, parsed_duration = parse_iso(date_time_duration_str)
                end_time = start_time + parsed_duration
                logger.debug('Calculated end time: %s', end_time)
                logger.debug('Parsed duration: %s', parsed_duration)
                return end_time, parsed_duration
            except ValueError:
                logger.debug('Ignoring %s for calculating start time.',
                             files[-1])
                files.pop()

    duration = timedelta(hours=1)
    # Overwrite the default duration if it's set in the config
    if 'duration' in config.keys():
        str_duration = config['duration']
        duration = isodate.parse_duration(f'P{str_duration}')

    end_time = datetime.now()

    if cli_start_time is None:
        # Determine start_time and duration without existing filename
        start_time = end_time - duration
        # Overwrite the default start_time and duration if in the config
        if 'start' in config.keys():
            start_time = isodate.parse_datetime(config['start'])
    else:
        start_time = cli_start_time

    logger.debug('End time and duration are %s %s', end_time, duration)

    return start_time, duration
Ejemplo n.º 5
0
class Msgtopdf:
    def __init__(self, msgfile):
        if check_paths_exist(required_paths) is False:
            sys.exit(1)
        outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace(
            "MAPI")
        self.msgfile = PurePath(msgfile)
        self.directory = PurePath(self.msgfile).parent
        self.file = PurePath(self.msgfile).name
        self.file_name = self.file.split(".msg")[0]
        self.save_path = self.__define_save_path()
        self.msg = outlook.OpenSharedItem(self.msgfile)

    def raw_email_body(self):
        if self.msg.BodyFormat == 2:
            body = self.msg.HTMLBody
            self.email_format = "html"
        elif self.msg.BodyFormat == 3:
            body = self.msg.RTFBody
            self.email_format = "html"
        else:
            body = self.msg.Body
            self.email_format = "txt"
        self.raw_body = body
        return self.raw_body

    def email2pdf(self):
        Path.mkdir(Path(self.save_path))
        html_header = self.__add_header_information()
        raw_email_body = self.raw_email_body()
        full_email_body = html_header + raw_email_body
        clean_email_body = self.replace_CID(full_email_body)
        self.html_body_file = PurePath(self.save_path,
                                       self.file_name + ".html")
        self.extract_email_attachments()
        # convert_html_to_pdf(clean_email_body, self.html_body_file)
        with open(self.html_body_file, "w", encoding="utf-8") as f:
            f.write(clean_email_body)
        # save pdf copy using wkhtmltopdf
        try:
            subprocess.run([
                "wkhtmltopdf",
                "--enable-local-file-access",
                "--log-level",
                "warn",
                "--encoding",
                "utf-8",
                "--footer-font-size",
                "6",
                "--footer-line",
                "--footer-center",
                "[page] / [topage]",
                str(self.html_body_file),
                str(PurePath(self.save_path, self.file_name + ".pdf")),
            ])
        except Exception as e:
            logging.critical("Could not call wkhtmltopdf")
            logging.debug(e)
        self.__delete_redundant_files()

    def extract_email_attachments(self):
        count_attachments = self.msg.Attachments.Count
        if count_attachments > 0:
            for item in range(count_attachments):
                attachment_filename = self.msg.Attachments.Item(item +
                                                                1).Filename
                self.msg.Attachments.Item(item + 1).SaveAsFile(
                    PurePath(self.save_path, attachment_filename))

    def __define_save_path(self):
        msgfile_name = self.file.split(".msg")[0]
        msgfile_folder = self.clean_path(msgfile_name)
        save_path = PurePath(self.directory, msgfile_folder)
        # TODO check if save_path already exists and if so add increment
        return save_path

    def __add_header_information(self):
        html_str = """
        <head>
        <meta charset="UTF-8">
        <base href="{base_href}">
        <p style="font-family: Arial;font-size: 11.0pt">
        </head>
        <strong>From:</strong>               {sender}</br>
        <strong>Sent:</strong>               {sent}</br>
        <strong>To:</strong>                 {to}</br>
        <strong>Cc:</strong>                 {cc}</br>
        <strong>Subject:</strong>            {subject}</p>
        <hr>
        """
        formatted_html = html_str.format(
            base_href="file:///" + str(self.save_path) + "\\",
            sender=self.msg.SenderName,
            sent=self.msg.SentOn,
            to=self.msg.To,
            cc=self.msg.CC,
            subject=self.msg.Subject,
            attachments=self.msg.Attachments,
        )
        return formatted_html

    def replace_CID(self, body):
        self.image_files = []
        # search for cid:(capture_group)@* upto "
        p = re.compile(r"cid:([^\"@]*)[^\"]*")
        r = p.sub(self.__return_image_reference, body)
        return r

    def __return_image_reference(self, match):
        value = str(match.groups()[0])
        if value not in self.image_files:
            self.image_files.append(value)
        return value

    def __delete_redundant_files(self):
        Path.unlink(Path(self.html_body_file))
        for f in self.image_files:
            image_full_path = Path(self.save_path, f)
            if Path.exists(image_full_path):
                Path.unlink(image_full_path)

    def clean_path(self, path):
        c_path = re.sub(r'[\\/\:*"<>\|\.%\$\^&£]', "", path)
        c_path = re.sub(r"[ ]{2,}", "", c_path)
        c_path = c_path.strip()
        return c_path
Ejemplo n.º 6
0
# %%
min_disp = 70
num_disp = 10 * 16
block_size = 31
stereo = cv2.StereoBM_create(numDisparities=num_disp, blockSize=block_size)
stereo.setMinDisparity(min_disp)
stereo.setDisp12MaxDiff(200)
stereo.setUniquenessRatio(10)
stereo.setSpeckleRange(3)
stereo.setSpeckleWindowSize(3)

images_l = sorted(left.glob('*_Left.png'))
for filename_left in images_l:
    filename_stem = PurePath(filename_left).stem
    filename_parts = filename_stem.split('_')
    file_number = filename_parts[0] + '_' + filename_parts[1]
    filename_right = right / (file_number + '_Right.png')
    img_l = cv2.imread(str(filename_left))
    img_r = cv2.imread(str(filename_right))

    dst_l, dst_r = calibrateImages(img_l, img_r)

    #dst_l, dst_r = biFilter(dst_l, dst_r, 17, 35)
    #dst_l, dst_r = gaussFilter(dst_l, dst_r, 17)

    gray_l = cv2.cvtColor(dst_l, cv2.COLOR_BGR2GRAY)
    gray_r = cv2.cvtColor(dst_r, cv2.COLOR_BGR2GRAY)
    disp = stereo.compute(gray_l, gray_r).astype('float')

    #disp1 = stereo.compute(dst_l[:,:,0], dst_r[:,:,0]).astype('float')
Ejemplo n.º 7
0
    def __from_h5_dset(self, h5_dset, data_sel, datapoint):
        """
        initialize S5Pmsm object from h5py dataset
        """
        self.name = PurePath(h5_dset.name).name

        # copy dataset values (and error) to object
        if data_sel is None:
            if datapoint:
                self.value = h5_dset['value']
                self.error = h5_dset['error']
            else:
                self.value = h5_dset[...]
        else:
            # we need to keep all dimensions to get the dimensions
            # of the output data right
            if datapoint:
                self.value = h5_dset['value'][data_sel]
                self.error = h5_dset['error'][data_sel]
                if isinstance(data_sel, tuple):
                    for ii, elmnt in enumerate(data_sel):
                        if isinstance(elmnt, (int, np.int64)):
                            self.value = np.expand_dims(self.value, axis=ii)
                            self.error = np.expand_dims(self.error, axis=ii)
            else:
                self.value = h5_dset[data_sel]
                if isinstance(data_sel, tuple):
                    for ii, elmnt in enumerate(data_sel):
                        if isinstance(elmnt, (int, np.int64)):
                            self.value = np.expand_dims(self.value, axis=ii)

        # set default dimension names
        if h5_dset.ndim == 1:
            keys_default = ['column']
        elif h5_dset.ndim == 2:
            keys_default = ['row', 'column']
        elif h5_dset.ndim == 3:
            keys_default = ['time', 'row', 'column']
        else:
            raise ValueError('not implemented for ndim > 3')

        # copy all dimensions with size longer then 1
        keys = []
        dims = []
        for ii in range(h5_dset.ndim):
            if self.value.shape[ii] == 1:
                continue

            if len(h5_dset.dims[ii]) != 1:  # bug in some KMNI HDF5 files
                keys.append(keys_default[ii])
                dims.append(np.arange(self.value.shape[ii]))
            elif self.value.shape[ii] == h5_dset.shape[ii]:
                buff = PurePath(h5_dset.dims[ii][0].name).name
                if len(buff.split()) > 1:
                    buff = buff.split()[0]
                keys.append(buff)
                if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]:
                    buff = h5_dset.dims[ii][0][:]
                    if np.all(buff == 0):
                        buff = np.arange(buff.size)
                else:  # bug in some KMNI HDF5 files
                    buff = np.arange(h5_dset.shape[ii])
                dims.append(buff)
            else:
                buff = PurePath(h5_dset.dims[ii][0].name).name
                if len(buff.split()) > 1:
                    buff = buff.split()[0]
                keys.append(buff)
                if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]:
                    buff = h5_dset.dims[ii][0][:]
                    if np.all(buff == 0):
                        buff = np.arange(buff.size)
                else:  # bug in some KMNI HDF5 files
                    buff = np.arange(h5_dset.shape[ii])

                if isinstance(data_sel, slice):
                    dims.append(buff[data_sel])
                elif len(data_sel) == h5_dset.ndim:
                    dims.append(buff[data_sel[ii]])
                elif not isinstance(data_sel, tuple):
                    dims.append(buff[data_sel])
                elif ii > len(data_sel):
                    dims.append(buff[data_sel[-1]])
                else:
                    dims.append(buff[data_sel[ii]])

        # add dimensions as a namedtuple
        coords_namedtuple = namedtuple('Coords', keys)
        self.coords = coords_namedtuple._make(dims)

        # remove all dimensions with size equal 1 from value (and error)
        self.value = np.squeeze(self.value)
        if datapoint:
            self.error = np.squeeze(self.error)

        # copy FillValue (same for value/error in a datapoint)
        if datapoint:
            self.fillvalue = h5_dset.fillvalue[0]
        else:
            self.fillvalue = h5_dset.fillvalue

        # copy its units
        if 'units' in h5_dset.attrs:
            if isinstance(h5_dset.attrs['units'], np.ndarray):
                if h5_dset.attrs['units'].size == 1:
                    self.units = h5_dset.attrs['units'][0]
                    if isinstance(self.units, bytes):
                        self.units = self.units.decode('ascii')
                else:
                    self.units = h5_dset.attrs['units']
                    if isinstance(self.units[0], bytes):
                        self.units = self.units.astype(str)
            else:
                self.units = h5_dset.attrs['units']
                if isinstance(self.units, bytes):
                    self.units = self.units.decode('ascii')

        # copy its long_name
        if 'long_name' in h5_dset.attrs:
            if isinstance(h5_dset.attrs['long_name'], bytes):
                self.long_name = h5_dset.attrs['long_name'].decode('ascii')
            else:
                self.long_name = h5_dset.attrs['long_name']
Ejemplo n.º 8
0
import sys
import os
from pathlib import PurePath
from pathlib import PureWindowsPath

if len(sys.argv) == 1:
    print("No files imported - Please drag and drop your files")

else:
    print('Number of given files:', len(sys.argv) - 1, 'Files')

    for filename in sys.argv[1:]:
        data = PurePath(filename).name
        splitted = data.split("_")
        filterObj = filter(lambda a: 'dat' in a, splitted)
        strObj = list(filterObj)[0]
        year = strObj[3:7]
        month = strObj[7:9]
        day = strObj[9:11]
        prefix = (year + "-" + month + "-" + day + "_")
        newfilename = str(
            PureWindowsPath(filename).parents[0]) + "\\" + prefix + data

        print("Old Filename:")
        print(filename)

        print("New Filename:")
        print(newfilename)
        print("---------")
        os.rename(filename, newfilename)
Ejemplo n.º 9
0
    def transform(self):

        try:
            inpath = self.input_path
            l = []
            if os.path.isabs(inpath):
                file_or_dir = os.path.splitext(inpath)[1]
                if file_or_dir == '' and os.path.isdir(
                        os.path.realpath(inpath)):
                    l = [
                        file for file in glob.glob(
                            os.path.realpath(inpath) + "/*.pdf")
                    ]
                elif file_or_dir == '.pdf' and os.path.isfile(
                        os.path.realpath(inpath)):
                    l = [os.path.realpath(inpath)]
            else:
                file_or_dir = os.path.splitext(inpath)[1]

                if file_or_dir == '' and os.path.isdir(
                        os.path.join(os.getcwd(), inpath)):
                    l = [
                        file for file in glob.glob(
                            os.path.join(os.getcwd(), inpath) + "/*.pdf")
                    ]
                elif file_or_dir == '.pdf' and os.path.isfile(
                        os.path.join(os.getcwd(), inpath)):
                    l = [os.path.join(os.getcwd(), inpath)]
                elif file_or_dir == '' and os.path.isdir(
                        os.path.realpath(inpath)):
                    l = [
                        file for file in glob.glob(
                            os.path.realpath(inpath) + "/*.pdf")
                    ]
                elif file_or_dir == '.pdf' and os.path.isfile(
                        os.path.realpath(inpath)):
                    l = [os.path.join(os.path.realpath(inpath), inpath)]
                else:
                    if os.name == 'nt':
                        l = []
                        for r, d, f in os.walk(os.path.realpath('\\')):

                            if file_or_dir == '.pdf':

                                for file in f:

                                    if len(inpath.split('\\')) == 1:
                                        if file == inpath:
                                            if os.path.isfile(
                                                    os.path.join(r, file)):
                                                l.append(os.path.join(r, file))
                                    else:
                                        if file == inpath.split(
                                                '\\')[-1] and '\\'.join(
                                                    inpath.split('\\')
                                                    [:-1]) in r:
                                            if os.path.isfile(
                                                    os.path.join(r, file)):
                                                l.append(os.path.join(r, file))
                            elif file_or_dir == '':

                                if inpath == r[-len(inpath):]:
                                    l += [
                                        file
                                        for file in glob.glob(r + "/*.pdf")
                                    ]

                    elif os.name == 'posix':
                        l = []
                        for r, d, f in os.walk('/'):

                            if file_or_dir == '.pdf':
                                for file in f:
                                    if len(inpath.split('/')) == 1:
                                        if file == inpath:
                                            if os.path.isfile(
                                                    os.path.join(r, file)):
                                                l.append(os.path.join(r, file))
                                    else:
                                        if file == inpath.split(
                                                '/')[-1] and '/'.join(
                                                    inpath.split('/')
                                                    [:-1]) in r:
                                            if os.path.isfile(
                                                    os.path.join(r, file)):
                                                l.append(os.path.join(r, file))
                            elif file_or_dir == '':

                                if inpath == r[-len(inpath):]:
                                    l += [
                                        file
                                        for file in glob.glob(r + "/*.pdf")
                                    ]

            if l == []:
                if file_or_dir == '':
                    raise FileNotFoundError(
                        "This is either a non-existent directory or no pdf file exists in this directory"
                    )
                elif file_or_dir == '.pdf':
                    raise FileNotFoundError("This is an non-existent pdf file")
                else:
                    raise ValueError("Only pdf files are acceptable as input")
            if os.name == 'nt':
                dirg = ''
                if self.which_win('gswin64c.exe') is not None:

                    dirg = str(PurePath(self.which_win('gswin64c.exe')))

                elif self.which_win('gswin32c.exe') is not None:

                    dirg = str(PurePath(self.which_win('gswin64c.exe')))

                else:
                    for r, d, f in os.walk(os.path.realpath('\\')):

                        for files in f:

                            if files == 'gswin64c.exe' or files == 'gswin32c.exe':

                                if self.is_exe(os.path.join(r, files)):
                                    dirt = str(PurePath(os.path.join(r,
                                                                     files)))

                if dirg is None or dirg == '':
                    sys.exit(
                        "Please install Ghostscript, if you want to complete this task"
                    )

                dirt = ''
                if self.which_win('tesseract.exe') is not None:

                    dirt = str(PurePath(self.which_win('tesseract.exe')))

                else:

                    for r, d, f in os.walk(os.path.realpath('\\')):

                        for files in f:

                            if files == "tesseract.exe":

                                if self.is_exe(os.path.join(r, files)):
                                    dirt = str(PurePath(os.path.join(r,
                                                                     files)))

                if dirt is None or dirt == '':
                    sys.exit(
                        "Please install Tesseract-OCR, if you want to complete this task"
                    )

                for inp in l:

                    si = subprocess.STARTUPINFO()

                    si.dwFlags |= subprocess.STARTF_USESHOWWINDOW

                    directory = str(PurePath(inp).parent)

                    inp = PurePath(inp).name

                    try:

                        call = PurePath(dirg).name

                        call = call.split('.')[0]

                        outp = inp.split('.')[0] + '.tiff'

                        command_line = call + ' -q -r300 -dNOPAUSE -sDEVICE=tiffgray -dBATCH -dINTERPOLATE -o ' + outp + ' -f ' + inp + ' -c quit'

                        args = shlex.split(command_line)

                        proc = subprocess.Popen(args,
                                                executable=dirg,
                                                stdout=subprocess.PIPE,
                                                stderr=subprocess.STDOUT,
                                                cwd=directory,
                                                startupinfo=si)

                        proc.wait()

                    except:

                        print(
                            "Error while running Ghostscript subprocess. Traceback:"
                        )

                        print("Traceback:\n%s" % traceback.format_exc())

                    stdout, stderr = proc.communicate()

                    if stderr:
                        print("Ghostscript stderr:\n'%s'" % stderr)

                    try:

                        read_pdf = outp.split('.')[0] + '_new'

                        command_line1 = 'tesseract ' + outp + ' ' + read_pdf + ' pdf'

                        args1 = shlex.split(command_line1)

                        proc1 = subprocess.Popen(args1,
                                                 executable=dirt,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.STDOUT,
                                                 cwd=directory,
                                                 startupinfo=si)

                        proc1.wait()

                    except:

                        print(
                            "Error while running Tesseract subprocess. Traceback:"
                        )

                        print("Traceback:\n%s" % traceback.format_exc())

                    stdout1, stderr1 = proc1.communicate()

                    if stderr1:
                        print("Tesseract stderr:\n'%s'" % stderr1)

                    try:

                        os.remove(directory + '\\' + outp)

                    except OSError:

                        raise

            elif os.name == 'posix':

                check1 = subprocess.Popen(['which', 'tesseract'],
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.STDOUT)
                check1.wait()
                c1stdout, c1stderr = check1.communicate()
                if c1stdout == b'':
                    raise FileNotFoundError(
                        "Please install Tesseract-OCR, if you want to complete this task"
                    )

                check2 = subprocess.Popen(['which', 'gs'],
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.STDOUT)
                check2.wait()
                c2stdout, c2stderr = check2.communicate()
                if c2stdout == b'':
                    raise FileNotFoundError(
                        "Please install Ghostscript, if you want to complete this task"
                    )

                for inp in l:

                    directory = str(PurePath(inp).parent)

                    inp = PurePath(inp).name

                    try:

                        outp = inp.split('.')[0] + '.tiff'

                        command_line = 'gs -q -r300 -dNOPAUSE -sDEVICE=tiffgray -dBATCH -dINTERPOLATE -o ' + outp + ' -f ' + inp + ' -c quit'

                        args = shlex.split(command_line)

                        proc = subprocess.Popen(args,
                                                stdout=subprocess.PIPE,
                                                stderr=subprocess.STDOUT,
                                                cwd=directory)

                        proc.wait()

                    except:

                        print(
                            "Error while running Ghostscript subprocess. Traceback:"
                        )

                        print("Traceback:\n%s" % traceback.format_exc())

                    stdout, stderr = proc.communicate()

                    if stderr:
                        print("Ghostscript stderr:\n'%s'" % stderr)

                    try:

                        read_pdf = outp.split('.')[0] + '_new'

                        command_line1 = 'tesseract ' + outp + ' ' + read_pdf + ' pdf'

                        args1 = shlex.split(command_line1)

                        proc1 = subprocess.Popen(args1,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.STDOUT,
                                                 cwd=directory)

                        proc1.wait()

                    except:

                        print(
                            "Error while running Tesseract subprocess. Traceback:"
                        )

                        print("Traceback:\n%s" % traceback.format_exc())

                    stdout1, stderr1 = proc1.communicate()

                    if stderr1:
                        print("Tesseract stderr:\n'%s'" % stderr1)

                    try:

                        os.remove(directory + '/' + outp)

                    except OSError:

                        raise

            else:

                sys.exit('Only for Windows or Linux')

        except OSError as error:

            if error.errno == errno.ENOENT:

                raise FileNotFoundError()

            elif error.errno in [errno.EPERM, errno.EACCES]:

                raise PermissionError()

            else:

                raise

        return "1"