Example #1
0
def get_discretizer(x,
                    y,
                    continuous_features=None,
                    seed=None,
                    min_depth=0) -> MDLP:
    discretizer = MDLP(random_state=seed, min_depth=min_depth)
    if continuous_features is not None:
        if continuous_features.dtype == np.bool:
            continuous_features = np.arange(
                len(continuous_features))[continuous_features]
    discretizer.fit(x, y, continuous_features)
    return discretizer
def num2cate_fit(df, min=2):
    '''
    Arg
        df (Panda dataframes); the last col must be class, int 0 or 1
        min (int): The minimum depth of the interval splitting. Overrides
        the MDLP stopping criterion. If the entropy at a given interval
        is found to be zero before `min_depth`, the algorithm will stop.
    Return
        mdlp (MDLP instance): transform, can be used to transform samples
    '''
    Y = df.iloc[:, -1].values
    continuous_features =df.iloc[:, :-1].select_dtypes(include=['int64','float64']).columns.tolist()
    continuous_features.sort() # ensoure the features order between fit and transform
    X = df[continuous_features].values
    mdlp = MDLP(min_depth=min)
    mdlp.fit(X, Y) # X, Y should be numpy array

    return mdlp
 def get_raw_bins(column, target):
     transformer = MDLP()
     transformer = transformer.fit(column, target)
     return list(transformer.cut_points_[0])