Ejemplo n.º 1
0
    def collect_files(self, units: list = None) -> list:
        """Collect files for all units

        Parameters
        ----------
        units : list, optional
            default all_units()

        Returns
        -------
        list
            flattened list of all files
        """
        units = f.as_list(units or all_units())

        # only use n_jobs as max of cpu count or n_units
        n_jobs = min(multiprocessing.cpu_count(), len(units))

        # parallel process collecting files per unit
        lst = Parallel(n_jobs=n_jobs, verbose=11)(delayed(self._collect_files_unit)(unit=unit) for unit in units)

        self.collected_files_dict = f.flatten_list_dict(lst)
        self.collected_files = f.flatten_list_list([v for v in self.collected_files_dict.values()])

        # log message for total number of files, and files per unit
        m_msg = {unit: len(items) for unit, items in self.collected_files_dict.items()}
        n = len(self.collected_files)
        log.info(f'Collected [{n}] files:\n{f.pretty_dict(m_msg, prnt=False)}')

        return self.collected_files
Ejemplo n.º 2
0
    def add_pictures(self, pics: list, **kw):
        pics = f.as_list(pics)
        doc = self.doc
        doc.add_page_break()
        doc.add_heading('Pictures', level=2)

        for pic in pics:
            self.add_picture(pic=pic, **kw)
Ejemplo n.º 3
0
    def process(self, units: list = None, lst: list = None) -> list:

        units = f.as_list(units or all_units())
        lst = lst or self.collect_files(units=units)

        name = f'process_{self.ftype}'
        log.info(f'{name} - units: [{len(units)}], startdate: {self.d_lower}')

        proc_func = getattr(self, f'{name}')
        proc_func(lst=lst)
Ejemplo n.º 4
0
    def safe_func(self, func: Callable, *args, **kw) -> Any:
        """Call func and reset db one time if failed (try to reconnect)

        Parameters
        ----------
        func : Callable
            function to wrap
        expected_exceptions : Union[Exception, List[Exception]]
            hidden kw, pass in to not suppress specific expected exceptions

        Returns
        -------
        Any
            result of sqlalchemy function call

        Raises
        ------
        er.SMSDatabaseError
            if second attempt fails
        """

        # always check for expected_exceptions in kws
        expected_exceptions = f.as_list(kw.pop('expected_exceptions', []))

        _func = functools.partial(func, *args, **kw)

        try:
            return _func()

        except Exception as e:
            # pyodbc.Error raised as generic sqlalchemy.exc.DBAPIError

            # allow not suppressing exception
            if type(e) in expected_exceptions:
                raise e
            else:
                log.warning(f'type e: {type(e)}')
                if isinstance(e, exc.DBAPIError):
                    log.warning(f'_message: {e._message}')

            log.warning(f'Failed db func (retrying): {func}, {e}')
            self.reset()

            # try one more time after reset
            try:
                return _func()
            except Exception as e:
                fail_msg = f'Failed db func: {func}\n\targs: {args}, kw: {kw}\n\troot error: {str(e)}'
                raise er.SMSDatabaseError(fail_msg) from e
Ejemplo n.º 5
0
    def load_sections(self, secs: list):
        """Instantiate all sections passed in using getattr on this module.

        Parameters
        ----------
        secs : list or single items
        - str
        - dict
        """
        for sec in f.as_list(secs):
            # allow passing args with dict
            if not isinstance(sec, dict):
                sec = dict(name=sec)

            getattr(sys.modules[__name__], sec['name'])(report=self, **sec)
Ejemplo n.º 6
0
    def subset_notnull(self, style: 'Styler', cols: Union[str, List[str]]) -> pd.Series:
        """Subset df column(s) to only not null rows

        Parameters
        ----------
        style : Styler
        cols : Union[str, List[str]]

        Returns
        -------
        pd.Series
            true/false mask where all rows in cols are not null
        """
        cols = f.as_list(cols)
        return pd.IndexSlice[style.data[cols].notnull().all(axis=1), cols]
Ejemplo n.º 7
0
    def add_attachments(self, lst_attach: List[str] = None) -> None:
        """Add multiple attachments to email

        Parameters
        ----------
        lst_attach : List[str], optional
            list of files to add, by default None
        """
        if lst_attach is None:
            return

        for p in f.as_list(lst_attach):
            try:
                self.add_attachment(p=p)
            except:
                log.warning(f'Couldn\'t add attachment: {p}')
Ejemplo n.º 8
0
def process_files(
        ftype: str,
        units: list = None,
        search_folders: list = ['downloads'],
        d_lower: dt = dt(2020, 1, 1),
        max_depth: int = 4,
        import_: bool = True,
        parallel: bool = True) -> Union[int, pd.DataFrame]:
    """
    Top level control function - pass in single unit or list of units
    1. Get list of files (plm, fault, dsc)
    2. Process - import plm/fault or 'fix' dsc eg downloads folder structure

    TODO - make this into a FileProcessor class
    """

    if ftype == 'tr3':
        search_folders.append('vibe tests')  # bit sketch

    # assume ALL units # TODO: make this work for all minesites?
    units = f.as_list(units or all_units())
    search_folders = [item.lower() for item in search_folders]

    lst = []

    fl.drive_exists()
    for unit in units:
        p_unit = efl.UnitFolder(unit=unit).p_unit
        lst_search = [x for x in p_unit.iterdir() if x.is_dir() and x.name.lower()
                      in search_folders]  # start at downloads

        # could search more than just downloads folder (eg event too)
        for p_search in lst_search:
            lst.extend(FolderSearch(ftype, d_lower=d_lower, max_depth=max_depth).search(p_search))

        # process all dsc folders per unit as we find them
        if ftype == 'dsc':
            log.info(f'Processing dsc, unit: {unit} | dsc folders found: {len(lst)}')

            # group by "downloads/2021/F301 - 2021-01-01 - DLS" to avoid parallel collisions
            lst_grouped = [list(g) for _, g in itertools.groupby(
                lst, lambda p: fl.get_parent(p, 'downloads', offset=2).name)]

            def proc_dsc_batch(lst: List[Path]) -> None:
                """Process batch of dsc files that may be in the same top folder"""
                for p in lst:
                    dls.fix_dsc(p)

            Parallel(n_jobs=-1, verbose=11)(delayed(proc_dsc_batch)(lst=lst) for lst in lst_grouped)
            # Parallel(n_jobs=-1, verbose=11)(delayed(dls.fix_dsc)(p=p) for p in lst)
            # return lst
            # if parallel:
            # else:
            #     # when calling "all_units", process individual files per unit in sequence to avoid conflicts

            #     for p in lst:
            #         dls.fix_dsc(p=p)

            lst = []  # need to reset list, only for dsc, this is a bit sketch
        elif ftype == 'tr3':
            for p in lst:
                dls.move_tr3(p=p)

            lst = []

    # collect all csv files for all units first, then import together
    if ftype in ('plm', 'fault'):
        log.info(f'num files: {len(lst)}')
        if lst:
            df = combine_csv(lst_csv=lst, ftype=ftype, d_lower=d_lower)
            return import_csv_df(df=df, ftype=ftype) if import_ else df

        else:
            return pd.DataFrame()  # return blank dataframe
Ejemplo n.º 9
0
def find_files_ext(p, extensions):
    extensions = f.as_list(extensions)
    return [p_ for p_ in p.rglob('*') if p_.suffix.lower().replace('.', '') in extensions and len(p_.suffix) > 0]