예제 #1
0
def main():

    dev = pd.read_pickle("dev.pkl")
    test = pd.read_pickle("test.pkl")

    features = [
        v
        for v in dev.columns.tolist()
        if v
        not in [
            "vintage",
            "PMI66007Rating",
            "BAE201405ProjLoss",
            "LossRate",
            "PMI66007LossRate",
            "M2_DQ15plus_Prin",
            "M3_DQ_Prin",
            "UTD_DQ_Prin",
            "PMI5ApprovalFlag",
        ]
    ]
    formula = "UTD_DQ15plus_Prin / LoanAmount : LoanAmount ~ " + " + ".join(features)

    strb = StrategyRobot(dev, test, formula=formula, aggregate=False)

    book = strb.book_creation(nbins=10, nbins_monotone=20, monotone_sig_level_threshold=0.1, yname="UTD 15+ DQ (%)")

    book.make_control_plots(
        which_variables=features,
        ylim=(0, 4),
        rounding=1,
        canvas_size=(9, 5),
        output_dir=os.getcwd() + "/UTD_15plus_DQ_",
    )

    var_sel = strb.variable_selection(book)
    top_variable_book = var_sel.select_k_best(
        method="univariate",
        drop_correlated=True,
        drop_correlated_threshold=0.6,
        k_best=10,
        force_monotone=True,
        rounding=1,
        output_dir=os.getcwd() + "/output/UTD_15plus_DQ_",
    )

    optimize = strb.optimization(top_variable_book, n_init_vars=3, eval_on_penalized=True, patience=True)
    optimize.search_rules(
        decrease_frac=0.1,
        stopping_eff=0.02  # ,
        # force_granularity_delta = 0.999
    )
    optimize.expand_rules(increase_frac=0.1)
    optimize.prune_rules(tolerate_n_std=0.0)
    optimize.select_rules()

    optimize.performance_output(output_dir=os.getcwd() + "/output/UTD_15plus_DQ_")
예제 #2
0
def main():

    dev = pd.read_pickle("data_input.pkl") 
    #dev = pd.read_pickle("data_input_old.pkl") 
    #dev = pd.read_pickle("data_input_new.pkl") 
    test = dev

    features = [v for v in dev.columns.tolist() if v not in ['M2_16PlusDQ_Prin', 'state', 'group_label']] 
    #features = ['FICO', 'PMI6_1']
    #features = ['BAC031_NumOpenBankcardTradesBalanceGT0ReptdLast6Mos', 'FICO']
    #features = features[300:]
    formula = 'M2_16PlusDQ_Prin / loanamount : loanamount | group_label ~ ' + ' + '.join(features)

    print(dev.shape)

    strb = StrategyRobot(dev, test, formula = formula, aggregate = False) 

    book = strb.book_creation(nbins = 5,
                              nbins_monotone = 5,
                              monotone_sig_level_threshold = 0.4, 
                              yname = '16+ DQ at M2 (%)') 

    #book.make_control_plots(which_variables = features,
    #                        ylim = (0, 2),
    #                        ylim_double_ratio = (0.5, 3.5),
    #                        rounding = 2,
    #                        canvas_size = (9, 5),
    #                        label_test_group = '11/15 - 02/16',
    #                        label_control_group = '08/15 - 10/15',
    #                        output_dir = os.getcwd() + '/output/16plus_DQ_')

    var_sel = strb.variable_selection(book)
    top_variable_book = var_sel.select_k_best(method = 'univariate',
                                              drop_correlated = True,
                                              drop_correlated_threshold = 0.6, 
                                              k_best = 12,
                                              force_monotone = True,
                                              rounding = 1,
                                              output_dir = os.getcwd() + '/output/16plus_DQ_')    

    make_top_variable_plot(top_variable_book)