Example #1
0
def test_mae():
    w0 = np.array([0.5, 0.125])
    w1 = np.array([0.125, (1 - 0.5 - 2 * 0.125)])
    d1 = np.array([0, 0])
    d2 = np.array([0, 1])
    d3 = np.array([1, 0])
    d4 = np.array([1, 1])
    nt.assert_almost_equal(cl.mae_from_distribution(d1, w0, w1), w1.sum())
    nt.assert_almost_equal(cl.mae_from_distribution(d2, w0, w1), w1[0] + w0[1])
    nt.assert_almost_equal(cl.mae_from_distribution(d3, w0, w1), w0[0] + w1[1])
    nt.assert_almost_equal(cl.mae_from_distribution(d4, w0, w1), w0.sum())
def main(xpl_data, num_features=None, num_iterations=None, save_todir=None):
    data = xpl_data.data
    # copies frequency data as original frequencies are used towards the end to estimate training error
    w0 = xpl_data.freq0.copy()
    w1 = xpl_data.freq1.copy()
    error_list = []
    mae_list = []
    DEC = np.zeros(w0.shape)
    GVector = []
    i=0
    winfile = "window_"
    win = ".win"
    png = ".png"
    w0_train, w1_train = cl.normalize_table(xpl_data.freq0, xpl_data.freq1)
    file = open(save_todir+"Error.txt", "w")

    for i in range(num_iterations):
        indices, feature_list, _ = ft.cmim(data, w0, w1, num_features)
        tw.to_window_file(indices, xpl_data.winshape, save_todir+winfile+str(i)+win)
        tw.to_image_file(indices,xpl_data.winshape, save_todir+winfile+str(i)+png, scale=8)
        w0, w1 = cl.normalize_table(w0, w1)
        w0, w1, updated_decision, cls_error =  cl.apply_feature_selection(data, indices, w0, w1)
        unique_array, unique_index = cl._apply_projection(data, indices)
        xplutil.write_minterm_file(save_todir+"mtm_"+str(i),indices, xpl_data.winshape,unique_array,updated_decision[unique_index])
        str_to_file = "Classification error for iteration " + str(i) +" = "+ str(cls_error) +".\n"
        file.write(str_to_file)
        error_list.append(cls_error)
        bt = cl.beta_factor(cls_error)
        gam = np.log(1/bt)
        GVector = np.append(GVector,gam)
        #DEC represents the Decision Table. Each column represents the decision
        #for an iteration
        DEC = np.column_stack((DEC,updated_decision))
        aux_dec = DEC
        aux_dec = np.delete(aux_dec,0, axis=1)
        hypothesis = cl.adaboost_decision(aux_dec, GVector)
        MAE_t = cl.mae_from_distribution(hypothesis,w0_train, w1_train)
        mae_list = np.append(mae_list,MAE_t)
        str_to_file = "MAE for iteration " + str(i) +" = "+ str(MAE_t) +".\n\n"
        file.write(str_to_file)

    #Must delete the first column because it contains only Zeros as it was initialized with np.zeros()
    DEC = np.delete(DEC,0, axis=1)
    hypothesis = cl.adaboost_decision(DEC, GVector)
     
    MAE = cl.mae_from_distribution(hypothesis, w0_train, w1_train)
    str_to_file = "Final MAE = "+ str(MAE)
    file.write(str_to_file)
    #print MAE
    file.close()
    gra.plot_MAE_iter(np.array(range(num_iterations)), np.array(mae_list))
Example #3
0
def train(xpl_data, n_features, n_iterations, dirpath):
    Xdata = xpl_data.data
    win = xpl_data.windata
    # copies frequency data as original frequencies are used towards the end to estimate training error
    w0 = xpl_data.freq0.copy()
    w1 = xpl_data.freq1.copy()
    error_list = []
    mae_list = []
    GVector = []
    DEC = np.zeros(w0.shape)
    total = float(np.sum([w0, w1]))
    w0_train = w0/total
    w1_train = w1/total

    file = open(dirpath+"MAE_training.txt", "w")

    for i in range(n_iterations):
        indices, feature_list, _ = ft.cmim(Xdata, w0, w1, n_features)
        indices = np.sort(indices)
        triosw.to_window_file(indices, xpl_data.winshape, dirpath+"window_"+str(i)+".win")
        triosw.to_image_file(indices,xpl_data.winshape, dirpath+"window_"+str(i)+".png", scale=8)
        total = float(np.sum([w0, w1]))
        w0 = w0/total
        w1 = w1/total
        w0, w1, updated_decision, cls_error =  clf.apply_feature_selection(Xdata, indices, w0, w1)
        unique_array, unique_index = clf._apply_projection(Xdata, indices)
        xplutil.write_minterm_file(dirpath+"mtm"+str(i),indices, xpl_data.winshape, unique_array,updated_decision[unique_index])
        #str_to_file = "Classification error for iteration " + str(i) +" = "+ str(cls_error) +".\n"
        #file.write(str_to_file)
        error_list.append(cls_error)
        bt = clf.beta_factor(cls_error)
        gam = np.log(1/bt)
        GVector = np.append(GVector,gam)
        #DEC represents the Decision Table. Each column represents the decision for an iteration
        DEC = np.column_stack((DEC,updated_decision))
        aux_dec = DEC
        aux_dec = np.delete(aux_dec,0, axis=1)
        hypothesis = clf.adaboost_decision(aux_dec, GVector)
        MAE_t = clf.mae_from_distribution(hypothesis,w0_train, w1_train)
        mae_list = np.append(mae_list,MAE_t)
        str_to_file =  str(i) +", "+ str(MAE_t) +"\n"
        file.write(str_to_file)
    #Must delete the first column because it contains only Zeros as it was initialized with np.zeros()
    DEC = np.delete(DEC,0, axis=1)
    hypothesis = clf.adaboost_decision(DEC, GVector)
    #MAE = clf.mae_from_distribution(hypothesis,w0_train, w1_train)
    #str_to_file = "Final MAE = "+str(MAE)
    #file.write(str(MAE))
    file.close()
    plot_MAE(np.array(range(n_iterations)), np.array(mae_list), dirpath)
    return Ensemble(xpl_data, win, n_features, n_iterations, error_list, mae_list,dirpath)