def num_bin(df:pd.DataFrame,cols:list=None,target:str='target',specials:list=None, bin_num_limit:int=5,count_distr_limit:float=0.05,sc_method='chimerge', non_mono_cols:list=None,init_bins=10,init_min_samples=0.05,init_method='chi',**kwargs): # 粗分箱,单调检验,分箱结果 if not cols: cols = df.columns.difference([target]).tolist() if specials: specials = {k: specials for k in cols} if not non_mono_cols: non_mono_cols = [] bind, ivd = dict(), dict() t0 = time.process_time() for col in cols: if col in non_mono_cols: bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, bin_num_limit=bin_num_limit, count_distr_limit=count_distr_limit, method=sc_method,print_info=False)[col] ivd[col] = bind[col]['total_iv'].unique()[0] else: c = Combiner() c.fit(X=df[col], y=df[target],n_bins=init_bins,min_samples=init_min_samples,method=init_method,**kwargs) init_points = c.export()[col] breaks_list = monotonous_bin(df=df, col=col, target=target,cutOffPoints=init_points, special_values=specials) bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, breaks_list=breaks_list, bin_num_limit=bin_num_limit,count_distr_limit=count_distr_limit,method=sc_method, print_info=False)[col] ivd[col] = bind[col]['total_iv'].unique()[0] print(f'there are bing {len(cols)} using {int((time.process_time() - t0) * 100 / 60)} seconds') return bind, ivd
def test_combiner_target_in_frame_kwargs(): combiner = Combiner().fit(df, y = 'target', n_bins = 4) bins = combiner.export() assert bins['A'][1] == 6
def test_combiner_step(): combiner = Combiner().fit(df['A'], method = 'step', n_bins = 4) bins = combiner.export() assert bins['A'][1] == 4.5
def test_combiner_export(): combiner = Combiner().fit(df, target, method = 'chi', n_bins = 4) bins = combiner.export() assert isinstance(bins['B'][0], list)