def leat_ai_raw(schema, train, test, s_option, nt, lift, z_beta): cov_c45 = 0.0 cov_leat = 0.0 data = np.vstack((train,test)) base_prob = dt.laplace_smoothing(data) obj_prob = lift * base_prob pred = np.zeros(len(data)) alpha_list = [-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0] # base tree output = [] nt = 1 for alpha in alpha_list: tree = dt.create_decision_tree(data, schema, alpha, -1, True, obj_prob, z_beta) pred_new = dt.apply_rules(data, schema, tree) pred_added = pred + pred_new cov_new = float(np.sum(pred_added > 0))/len(data) cov_orig = float(np.sum(pred > 0))/len(data) if cov_new > cov_orig: pred = pred_added output.append([nt,cov_new]) nt = nt + 1 print nt, cov_new return output
def leat(schema, train, test, s_option, nt, lift, z_beta): cov_c45 = 0.0 cov_leat = 0.0 data = np.vstack((train,test)) base_prob = dt.laplace_smoothing(data) obj_prob = lift * base_prob pred_a = np.zeros(len(data)) pred_c = np.zeros(len(data)) bag_cnt = 0 alpha_list = [-1.0, -0.75, -0.5,-0.25, 0.0, 0.25, 0.5, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0] while True: # bagging newdata = sampling(data, s_option) # base tree tree = dt.create_decision_tree(newdata, schema, 1.0, -1, True, obj_prob, z_beta) pred = dt.apply_rules(data, schema, tree) pred_c = pred_c + pred pred_a = pred_a + pred for alpha in alpha_list: tree = dt.create_decision_tree(newdata, schema, alpha, -1, True, obj_prob, z_beta) pred = dt.apply_rules(data, schema, tree) pred_a = pred_a + pred bag_cnt = bag_cnt + 1 if bag_cnt > nt: break cov_c45 = float(np.sum(pred_c > 0))/len(data) cov_leat = float(np.sum(pred_a > 0))/len(data) return cov_c45, cov_leat