Ejemplo n.º 1
0
 def save(self, filepath: str, model_name: str):
     return Util.dump(self.model, filepath + model_name + ".pkl")
Ejemplo n.º 2
0
def run_blocks(input_df: pd.DataFrame,
               blocks: List,
               y=None,
               preprocess_block=None,
               logger=None,
               filepath: str = "./",
               task: str = "train",
               save_feature: bool = False) -> pd.DataFrame:
    """
    Args:
        input_df (pd.DataFrame): original DataFrame
        blocks (List): function block
        y (_type_, optional): _description_. Defaults to None.
        preprocess_block (_type_, optional): if need preprocessing for example fillna, you need set function of preporcessing
        logger (_type_, optional): if is not None, output log fie
        filepath (str, optional): output feature block as pkl. Defaults to "./".
        task (str, optional): _description_. Defaults to "train".
        save_feature; create feature as pkl. default=False

    Returns:
        pd.DataFrame: feature engined feature
    """
    out_df = pd.DataFrame()
    if preprocess_block is not None:
        input_df = preprocess_block(input_df)
    _input_df = input_df.copy()

    if save_feature and not os.path.isdir(filepath + "features/"):
        os.makedirs(filepath + "features")

    print(decorate(f"start create block for {task}"))

    with Timer(logger=logger, prefix=f'create {task} block'):
        for block in blocks:
            if save_feature:
                if hasattr(block, "cols"):
                    if hasattr(block, "name") and hasattr(
                            block, "n_components"):
                        file_name = os.path.join(
                            filepath + "features/",
                            f"{task}_{block.__class__.__name__}_{str(block.cols)}_{str(block.name)}_{str(block.n_components)}.pkl"
                        )
                    else:
                        file_name = os.path.join(
                            filepath + "features/",
                            f"{task}_{block.__class__.__name__}_{str(block.cols)}.pkl"
                        )
                else:
                    file_name = os.path.join(
                        filepath + "features/",
                        f"{task}_{block.__class__.__name__}.pkl")

            with Timer(logger=logger, prefix='\t- {}'.format(str(block))):
                if save_feature and os.path.isfile(file_name):
                    out_i = Util.load(file_name)
                else:
                    if task == "train":
                        out_i = block.fit(_input_df)
                        if save_feature:
                            Util.dump(out_i, file_name)
                    else:
                        out_i = block.transform(_input_df)
                        if save_feature:
                            Util.dump(out_i, file_name)

            assert len(input_df) == len(out_i), block
            name = block.__class__.__name__
            out_df = pd.concat([out_df, out_i.add_suffix(f'@{name}')], axis=1)

    return out_df