def collect_files(self, units: list = None) -> list: """Collect files for all units Parameters ---------- units : list, optional default all_units() Returns ------- list flattened list of all files """ units = f.as_list(units or all_units()) # only use n_jobs as max of cpu count or n_units n_jobs = min(multiprocessing.cpu_count(), len(units)) # parallel process collecting files per unit lst = Parallel(n_jobs=n_jobs, verbose=11)(delayed(self._collect_files_unit)(unit=unit) for unit in units) self.collected_files_dict = f.flatten_list_dict(lst) self.collected_files = f.flatten_list_list([v for v in self.collected_files_dict.values()]) # log message for total number of files, and files per unit m_msg = {unit: len(items) for unit, items in self.collected_files_dict.items()} n = len(self.collected_files) log.info(f'Collected [{n}] files:\n{f.pretty_dict(m_msg, prnt=False)}') return self.collected_files
def add_pictures(self, pics: list, **kw): pics = f.as_list(pics) doc = self.doc doc.add_page_break() doc.add_heading('Pictures', level=2) for pic in pics: self.add_picture(pic=pic, **kw)
def process(self, units: list = None, lst: list = None) -> list: units = f.as_list(units or all_units()) lst = lst or self.collect_files(units=units) name = f'process_{self.ftype}' log.info(f'{name} - units: [{len(units)}], startdate: {self.d_lower}') proc_func = getattr(self, f'{name}') proc_func(lst=lst)
def safe_func(self, func: Callable, *args, **kw) -> Any: """Call func and reset db one time if failed (try to reconnect) Parameters ---------- func : Callable function to wrap expected_exceptions : Union[Exception, List[Exception]] hidden kw, pass in to not suppress specific expected exceptions Returns ------- Any result of sqlalchemy function call Raises ------ er.SMSDatabaseError if second attempt fails """ # always check for expected_exceptions in kws expected_exceptions = f.as_list(kw.pop('expected_exceptions', [])) _func = functools.partial(func, *args, **kw) try: return _func() except Exception as e: # pyodbc.Error raised as generic sqlalchemy.exc.DBAPIError # allow not suppressing exception if type(e) in expected_exceptions: raise e else: log.warning(f'type e: {type(e)}') if isinstance(e, exc.DBAPIError): log.warning(f'_message: {e._message}') log.warning(f'Failed db func (retrying): {func}, {e}') self.reset() # try one more time after reset try: return _func() except Exception as e: fail_msg = f'Failed db func: {func}\n\targs: {args}, kw: {kw}\n\troot error: {str(e)}' raise er.SMSDatabaseError(fail_msg) from e
def load_sections(self, secs: list): """Instantiate all sections passed in using getattr on this module. Parameters ---------- secs : list or single items - str - dict """ for sec in f.as_list(secs): # allow passing args with dict if not isinstance(sec, dict): sec = dict(name=sec) getattr(sys.modules[__name__], sec['name'])(report=self, **sec)
def subset_notnull(self, style: 'Styler', cols: Union[str, List[str]]) -> pd.Series: """Subset df column(s) to only not null rows Parameters ---------- style : Styler cols : Union[str, List[str]] Returns ------- pd.Series true/false mask where all rows in cols are not null """ cols = f.as_list(cols) return pd.IndexSlice[style.data[cols].notnull().all(axis=1), cols]
def add_attachments(self, lst_attach: List[str] = None) -> None: """Add multiple attachments to email Parameters ---------- lst_attach : List[str], optional list of files to add, by default None """ if lst_attach is None: return for p in f.as_list(lst_attach): try: self.add_attachment(p=p) except: log.warning(f'Couldn\'t add attachment: {p}')
def process_files( ftype: str, units: list = None, search_folders: list = ['downloads'], d_lower: dt = dt(2020, 1, 1), max_depth: int = 4, import_: bool = True, parallel: bool = True) -> Union[int, pd.DataFrame]: """ Top level control function - pass in single unit or list of units 1. Get list of files (plm, fault, dsc) 2. Process - import plm/fault or 'fix' dsc eg downloads folder structure TODO - make this into a FileProcessor class """ if ftype == 'tr3': search_folders.append('vibe tests') # bit sketch # assume ALL units # TODO: make this work for all minesites? units = f.as_list(units or all_units()) search_folders = [item.lower() for item in search_folders] lst = [] fl.drive_exists() for unit in units: p_unit = efl.UnitFolder(unit=unit).p_unit lst_search = [x for x in p_unit.iterdir() if x.is_dir() and x.name.lower() in search_folders] # start at downloads # could search more than just downloads folder (eg event too) for p_search in lst_search: lst.extend(FolderSearch(ftype, d_lower=d_lower, max_depth=max_depth).search(p_search)) # process all dsc folders per unit as we find them if ftype == 'dsc': log.info(f'Processing dsc, unit: {unit} | dsc folders found: {len(lst)}') # group by "downloads/2021/F301 - 2021-01-01 - DLS" to avoid parallel collisions lst_grouped = [list(g) for _, g in itertools.groupby( lst, lambda p: fl.get_parent(p, 'downloads', offset=2).name)] def proc_dsc_batch(lst: List[Path]) -> None: """Process batch of dsc files that may be in the same top folder""" for p in lst: dls.fix_dsc(p) Parallel(n_jobs=-1, verbose=11)(delayed(proc_dsc_batch)(lst=lst) for lst in lst_grouped) # Parallel(n_jobs=-1, verbose=11)(delayed(dls.fix_dsc)(p=p) for p in lst) # return lst # if parallel: # else: # # when calling "all_units", process individual files per unit in sequence to avoid conflicts # for p in lst: # dls.fix_dsc(p=p) lst = [] # need to reset list, only for dsc, this is a bit sketch elif ftype == 'tr3': for p in lst: dls.move_tr3(p=p) lst = [] # collect all csv files for all units first, then import together if ftype in ('plm', 'fault'): log.info(f'num files: {len(lst)}') if lst: df = combine_csv(lst_csv=lst, ftype=ftype, d_lower=d_lower) return import_csv_df(df=df, ftype=ftype) if import_ else df else: return pd.DataFrame() # return blank dataframe
def find_files_ext(p, extensions): extensions = f.as_list(extensions) return [p_ for p_ in p.rglob('*') if p_.suffix.lower().replace('.', '') in extensions and len(p_.suffix) > 0]