def main(): dev = pd.read_pickle("dev.pkl") test = pd.read_pickle("test.pkl") features = [ v for v in dev.columns.tolist() if v not in [ "vintage", "PMI66007Rating", "BAE201405ProjLoss", "LossRate", "PMI66007LossRate", "M2_DQ15plus_Prin", "M3_DQ_Prin", "UTD_DQ_Prin", "PMI5ApprovalFlag", ] ] formula = "UTD_DQ15plus_Prin / LoanAmount : LoanAmount ~ " + " + ".join(features) strb = StrategyRobot(dev, test, formula=formula, aggregate=False) book = strb.book_creation(nbins=10, nbins_monotone=20, monotone_sig_level_threshold=0.1, yname="UTD 15+ DQ (%)") book.make_control_plots( which_variables=features, ylim=(0, 4), rounding=1, canvas_size=(9, 5), output_dir=os.getcwd() + "/UTD_15plus_DQ_", ) var_sel = strb.variable_selection(book) top_variable_book = var_sel.select_k_best( method="univariate", drop_correlated=True, drop_correlated_threshold=0.6, k_best=10, force_monotone=True, rounding=1, output_dir=os.getcwd() + "/output/UTD_15plus_DQ_", ) optimize = strb.optimization(top_variable_book, n_init_vars=3, eval_on_penalized=True, patience=True) optimize.search_rules( decrease_frac=0.1, stopping_eff=0.02 # , # force_granularity_delta = 0.999 ) optimize.expand_rules(increase_frac=0.1) optimize.prune_rules(tolerate_n_std=0.0) optimize.select_rules() optimize.performance_output(output_dir=os.getcwd() + "/output/UTD_15plus_DQ_")
def main(): dev = pd.read_pickle("data_input.pkl") #dev = pd.read_pickle("data_input_old.pkl") #dev = pd.read_pickle("data_input_new.pkl") test = dev features = [v for v in dev.columns.tolist() if v not in ['M2_16PlusDQ_Prin', 'state', 'group_label']] #features = ['FICO', 'PMI6_1'] #features = ['BAC031_NumOpenBankcardTradesBalanceGT0ReptdLast6Mos', 'FICO'] #features = features[300:] formula = 'M2_16PlusDQ_Prin / loanamount : loanamount | group_label ~ ' + ' + '.join(features) print(dev.shape) strb = StrategyRobot(dev, test, formula = formula, aggregate = False) book = strb.book_creation(nbins = 5, nbins_monotone = 5, monotone_sig_level_threshold = 0.4, yname = '16+ DQ at M2 (%)') #book.make_control_plots(which_variables = features, # ylim = (0, 2), # ylim_double_ratio = (0.5, 3.5), # rounding = 2, # canvas_size = (9, 5), # label_test_group = '11/15 - 02/16', # label_control_group = '08/15 - 10/15', # output_dir = os.getcwd() + '/output/16plus_DQ_') var_sel = strb.variable_selection(book) top_variable_book = var_sel.select_k_best(method = 'univariate', drop_correlated = True, drop_correlated_threshold = 0.6, k_best = 12, force_monotone = True, rounding = 1, output_dir = os.getcwd() + '/output/16plus_DQ_') make_top_variable_plot(top_variable_book)