Exemple #1
0
def leat_ai_raw(schema, train, test, s_option, nt, lift, z_beta):
   
   
    cov_c45 = 0.0
    cov_leat = 0.0

    data = np.vstack((train,test))
    base_prob = dt.laplace_smoothing(data)
    obj_prob = lift * base_prob    
    
    pred = np.zeros(len(data))
 
    alpha_list = [-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0]
    # base tree 
       
    output = [] 
    nt = 1
    for alpha in alpha_list:
        tree = dt.create_decision_tree(data, schema, alpha, -1, 
                                            True, obj_prob, z_beta)
        pred_new = dt.apply_rules(data, schema, tree)
        pred_added = pred + pred_new
          
        cov_new = float(np.sum(pred_added > 0))/len(data)
        cov_orig = float(np.sum(pred > 0))/len(data)
        
        if cov_new > cov_orig:
            pred = pred_added         
            output.append([nt,cov_new])
            nt = nt + 1
            print nt, cov_new
 
    return output
Exemple #2
0
def leat(schema, train, test, s_option, nt, lift, z_beta):
   
   
    cov_c45 = 0.0
    cov_leat = 0.0

    data = np.vstack((train,test))
    base_prob = dt.laplace_smoothing(data)
    obj_prob = lift * base_prob    
    
    pred_a = np.zeros(len(data))
    pred_c = np.zeros(len(data))
 
    bag_cnt = 0 
    alpha_list = [-1.0, -0.75, -0.5,-0.25, 0.0, 0.25, 0.5, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0]
    while True:
        # bagging   
        newdata = sampling(data, s_option) 
        # base tree 
        tree = dt.create_decision_tree(newdata, schema, 1.0, -1, 
                                            True, obj_prob, z_beta)
        pred = dt.apply_rules(data, schema, tree)
        pred_c = pred_c + pred
        pred_a = pred_a + pred
        
        for alpha in alpha_list:
            tree = dt.create_decision_tree(newdata, schema, alpha, -1, 
                                            True, obj_prob, z_beta)
            pred = dt.apply_rules(data, schema, tree)
            pred_a = pred_a + pred 
        
        bag_cnt = bag_cnt + 1
        if bag_cnt > nt:
            break     
    
    cov_c45 = float(np.sum(pred_c > 0))/len(data)
    cov_leat = float(np.sum(pred_a > 0))/len(data)
    
    return cov_c45, cov_leat