def save(self, filepath: str, model_name: str): return Util.dump(self.model, filepath + model_name + ".pkl")
def run_blocks(input_df: pd.DataFrame, blocks: List, y=None, preprocess_block=None, logger=None, filepath: str = "./", task: str = "train", save_feature: bool = False) -> pd.DataFrame: """ Args: input_df (pd.DataFrame): original DataFrame blocks (List): function block y (_type_, optional): _description_. Defaults to None. preprocess_block (_type_, optional): if need preprocessing for example fillna, you need set function of preporcessing logger (_type_, optional): if is not None, output log fie filepath (str, optional): output feature block as pkl. Defaults to "./". task (str, optional): _description_. Defaults to "train". save_feature; create feature as pkl. default=False Returns: pd.DataFrame: feature engined feature """ out_df = pd.DataFrame() if preprocess_block is not None: input_df = preprocess_block(input_df) _input_df = input_df.copy() if save_feature and not os.path.isdir(filepath + "features/"): os.makedirs(filepath + "features") print(decorate(f"start create block for {task}")) with Timer(logger=logger, prefix=f'create {task} block'): for block in blocks: if save_feature: if hasattr(block, "cols"): if hasattr(block, "name") and hasattr( block, "n_components"): file_name = os.path.join( filepath + "features/", f"{task}_{block.__class__.__name__}_{str(block.cols)}_{str(block.name)}_{str(block.n_components)}.pkl" ) else: file_name = os.path.join( filepath + "features/", f"{task}_{block.__class__.__name__}_{str(block.cols)}.pkl" ) else: file_name = os.path.join( filepath + "features/", f"{task}_{block.__class__.__name__}.pkl") with Timer(logger=logger, prefix='\t- {}'.format(str(block))): if save_feature and os.path.isfile(file_name): out_i = Util.load(file_name) else: if task == "train": out_i = block.fit(_input_df) if save_feature: Util.dump(out_i, file_name) else: out_i = block.transform(_input_df) if save_feature: Util.dump(out_i, file_name) assert len(input_df) == len(out_i), block name = block.__class__.__name__ out_df = pd.concat([out_df, out_i.add_suffix(f'@{name}')], axis=1) return out_df