def writeFile(self, key, val, file_type): if not self.enable : logger.debug('Cache is disable') return None if val is None or len(val)==0: logger.debug('Return value is None or empty') return val elif isinstance(val, tuple): val_tuple = val else: val_tuple = (val,) if all([ isinstance(item, (pd.DataFrame, pd.Series)) for item in val_tuple]) : path = self.get_path(key, file_type) if file_type == 'h5': for index, df in enumerate(val_tuple): key = f'{self.df_key}_{index}' logger.debug(f"====Write {len(df)} records to File#{path}, with:{key}") df.to_hdf(path, key) elif file_type == 'pickle': pd.to_pickle(val, path) return val else: logger.warning(f'The return is not DataFrame or it is None:{[ isinstance(item, pd.DataFrame) for item in val_tuple]}') return val
def wrapper(*args, **kwargs): val = fn(*args, **kwargs) with timed_bolck(f'Reduce_Mem({fn.__name__}:{ex_type_name(val)})'): if isinstance(val, (pd.DataFrame,)) : val = _reduce_mem_usage(val, verbose=True) if isinstance(val, tuple) and all([ isinstance(df, (pd.DataFrame, pd.Series )) for df in val]): val = tuple([ _reduce_mem_usage(df, verbose=True) for df in val]) else: logger.warning(f'The return type for fun#{fn.__name__} is:{type(val)}') return val
def wrapper(*args, **kwargs): mini_args = get_mini_args(args) mini_kwargs = get_mini_args(kwargs) """Ignore the file cache, if the input parameter don't support cache""" if not is_support_cache(*args, **kwargs): logger.warning(f'Do not support cache for fn:{f.__name__}, para:{str(mini_args)}, kw:{str(kwargs)}') return f(*args, **kwargs) key = '='.join([f.__name__, str(mini_args), str(mini_kwargs)]) key = key.replace('.', '_') key = key.replace('/', '_') while '__' in key: key = key.replace('__', '_') if prefix: key = '_'.join([prefix, key]) if overwrite is False: val = cache.readFile(key, type) if val is None or overwrite is True: val = f(*args, **kwargs) #call the wrapped function, save in cache cache.writeFile(key, val, type) return val # read value from cache
import matplotlib as plt import pandas as pd from sklearn.preprocessing import LabelEncoder from file_cache.utils.util_log import logger, timed try: plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 pd.set_option('display.height', 1000) pd.set_option('display.max_rows', 1000) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) except Exception as e: logger.warning(e) pd.options.mode.use_inf_as_na = True @timed(logger) def convert_label_encode(sample, exclude=[]): try: #Label encode obj_col = sample.select_dtypes(include=['object']).columns obj_col = [ item for item in obj_col if item != 'device' and item not in exclude ] print(f'{obj_col} will convert to label encode, and fillna with Other')