예제 #1
0
def crossData(data_list,
              alpha=0.0,
              rank_weight=False,
              stop_criterion_mis_rate=None,
              stop_criterion_min_node=1,
              stop_criterion_gain=0.0,
              prune_criteria=0):
    results = {}
    for data_train in data_list:
        results[data_train] = {}
        for data_test in data_list:
            if data_test == data_train:
                continue
            x_train, y_tr = LogR.dataClean(data_train)
            y_train = label2Rank(y_tr.tolist())
            x_test, y_te = LogR.dataClean(data_test)
            y_test = label2Rank(y_te.tolist())
            tree = DecisionTree().buildtree(
                x_train,
                y_train,
                weights=None,
                stop_criterion_mis_rate=stop_criterion_mis_rate,
                stop_criterion_min_node=stop_criterion_min_node,
                stop_criterion_gain=stop_criterion_gain)
            y_pred = tree.predict(x_test, alpha)
            results[data_train][data_test] = LogR.perfMeasure(y_pred,
                                                              y_test,
                                                              rankopt=True)
    return results
예제 #2
0
        if nocross:
            break

    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis=0)
        std = np.nanstd(item, axis=0)
        results[key] = [mean, std]

    return results


if __name__ == "__main__":
    x,y = LogR.dataClean("data/posts_Feature_Emotion.txt")
    y = label2Rank(y)
    # x,y = dataSimulated(100,3,5)
    # for j in range(1,6):
    #     stop_criterion_mis_rate = 0.22 - 0.04*j
    #     for m in range(10):
    #         ITER_MAX = 10 + m*10
    result = crossValidate(x,y, nocross = False, iter_max=ITER_MAX, cost = cost)
    print result
    with open("result_boost.txt","a") as f:
        f.write("Nsamp: %d\n" % x.shape[0])
        f.write("iter_max "+str(ITER_MAX)+"\n")
        f.write("stop misclassification rate %f\n" %stop_criterion_mis_rate)
        f.write("cost: AdaC2.M1\n")
        f.write("cost_level %s" % str(COST_LEVEL))
        f.write(str(result)+"\n")