def crossData(data_list, alpha=0.0, rank_weight=False, stop_criterion_mis_rate=None, stop_criterion_min_node=1, stop_criterion_gain=0.0, prune_criteria=0): results = {} for data_train in data_list: results[data_train] = {} for data_test in data_list: if data_test == data_train: continue x_train, y_tr = LogR.dataClean(data_train) y_train = label2Rank(y_tr.tolist()) x_test, y_te = LogR.dataClean(data_test) y_test = label2Rank(y_te.tolist()) tree = DecisionTree().buildtree( x_train, y_train, weights=None, stop_criterion_mis_rate=stop_criterion_mis_rate, stop_criterion_min_node=stop_criterion_min_node, stop_criterion_gain=stop_criterion_gain) y_pred = tree.predict(x_test, alpha) results[data_train][data_test] = LogR.perfMeasure(y_pred, y_test, rankopt=True) return results
if nocross: break for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis=0) std = np.nanstd(item, axis=0) results[key] = [mean, std] return results if __name__ == "__main__": x,y = LogR.dataClean("data/posts_Feature_Emotion.txt") y = label2Rank(y) # x,y = dataSimulated(100,3,5) # for j in range(1,6): # stop_criterion_mis_rate = 0.22 - 0.04*j # for m in range(10): # ITER_MAX = 10 + m*10 result = crossValidate(x,y, nocross = False, iter_max=ITER_MAX, cost = cost) print result with open("result_boost.txt","a") as f: f.write("Nsamp: %d\n" % x.shape[0]) f.write("iter_max "+str(ITER_MAX)+"\n") f.write("stop misclassification rate %f\n" %stop_criterion_mis_rate) f.write("cost: AdaC2.M1\n") f.write("cost_level %s" % str(COST_LEVEL)) f.write(str(result)+"\n")