def crossValidate(x, y, cv=5, K=None): """ :param y: N*L ranking vectors :return: """ results = {"perf": []} ## cross validation ## np.random.seed(1100) kf = KFold(n_splits=cv, shuffle=True, random_state=0) for train, test in kf.split(x): x_train = x[train, :] y_train = y[train, :] x_test = x[test, :] y_test = y[test, :] y_pred = KNNMallows(K=K).fit(x_train, y_train).predict(x_test) # print y_pred ### test results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True)) # print results["perf"][-1] for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis=0) std = np.nanstd(item, axis=0) results[key] = [mean, std] return results
def hyperParameter(x, y, x_valid=None, y_valid=None, cv=5, criteria=0): if x_valid is None: # no validation set, using cross validation # alpha_perform = [] kf = KFold(n_splits=cv, shuffle=True, random_state=0) for train, valid in kf.split(x): x_train = x[train, :] y_train = y[train, :] x_valid = x[valid, :] y_valid = y[valid, :] tree = DecisionTree().buildtree(x_train, y_train) alpha_list = tree.alphalist() alpha_best = [-1, None] for alpha in alpha_list: y_pred = tree.predict(x_valid, alpha=alpha) perf = perfMeasure(y_pred, y_valid, rankopt=True) perf_criteria = perf[criteria] if alpha_best[1] is not None and alpha_best[1] > perf_criteria: pass else: alpha_best[0] = alpha alpha_best[1] = perf_criteria alpha_perform.append(alpha_best) alpha_perform = np.array(alpha_perform, dtype=np.float32) print "inside hyperparameter:", alpha_perform ### test return np.average(alpha_perform, axis=0)[0] else: tree = DecisionTree().buildtree(x, y) alpha_list = tree.alphalist() alpha_best = [-1, None] for alpha in alpha_list: y_pred = tree.predict(x_valid, alpha=alpha) perf = perfMeasure(y_pred, y_valid, rankopt=True) perf_criteria = perf[criteria] if alpha_best[1] is not None and alpha_best[1] > perf_criteria: pass else: alpha_best[0] = alpha alpha_best[1] = perf_criteria return alpha_best[0]
def crossValidate(x,y, cv=5, alpha = 0.0, rank_weight = False, stop_criterion_mis_rate = None, stop_criterion_min_node = 1, stop_criterion_gain = 0.0, prune_criteria = 0): results = {"alpha": [], "perf": []} # cross validation # np.random.seed(1100) kf = KFold(n_splits=cv, shuffle=True, random_state=0) ## for testing fixing random_state for train, test in kf.split(x): x_train = x[train, :] y_train = y[train, :] x_test = x[test, :] y_test = y[test, :] # training and predict if alpha == None: ## nested select validate and test ## print "start searching alpha:", datetime.now() ### test alpha_sel = hyperParameter(x_train, y_train, criteria=prune_criteria) print "finish searching alpha:", datetime.now(), alpha ### test else: alpha_sel = alpha # weight # if rank_weight: weights = rank2Weight(y_train) else: weights = None tree = DecisionTree().buildtree(x_train,y_train, weights, stop_criterion_mis_rate= stop_criterion_mis_rate, stop_criterion_min_node = stop_criterion_min_node, stop_criterion_gain=stop_criterion_gain) # prune # alpha_list = tree.alphalist() # performance measure y_pred = tree.predict(x_test, alpha_sel) results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True)) results["alpha"].append(alpha_sel) print alpha_sel, "alpha" for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis=0) std = np.nanstd(item, axis=0) results[key] = [mean, std] return results
def crossValidate(x, y, cv=5): results = {"perf": []} np.random.seed(1100) kf = KFold(n_splits=cv, shuffle=True, random_state=0) for train, test in kf.split(x): x_train = x[train, :] y_train = y[train, :] x_test = x[test, :] y_test = y[test, :] y_pred = LabelWiseRanking().fit(x_train, y_train).predict(x_test) results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True)) for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis=0) std = np.nanstd(item, axis=0) results[key] = [mean, std] return results
def crossValidate(x,y, method = "dT",cv=5, alpha = None, min_node = 1): # error measure results = [] if method == "logReg": results = {"perf":[], "coef":[], "interc":[]} elif method == "dT": results = {"alpha": [], "perf":[]} # cross validation # np.random.seed(1100) kf = KFold(n_splits = cv, shuffle = True, random_state = 0) ## for testing fixing random_state for train,test in kf.split(x): x_train = x[train,:] y_train = y[train,:] x_test = x[test,:] y_test = y[test,:] # training and predict if alpha == None: ## nested select validate and test ## # print "start searching alpha:", datetime.now() ### test alpha_sel, perf = DTme.hyperParometer(x_train,y_train) # print "finish searching alpha:", datetime.now(), alpha ### test else: alpha_sel = alpha result = decisionTree(x_train, y_train, x_test, alpha = alpha_sel, min_node = min_node) # performance measure alpha_sel, y_pred = result results["perf"].append(perfMeasure(y_pred,y_test,rankopt=True)) results["alpha"].append(alpha_sel) print alpha_sel, "alpha" for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis = 0) std = np.nanstd(item, axis = 0) results[key] = [mean, std] return results
def crossValidate(x, y, cv=5): results = {"perf": [], "coef": [], "interc": []} # cross validation # np.random.seed(1100) kf = KFold(n_splits=cv, shuffle=True, random_state=0) ## for testing fixing random_state for train, test in kf.split(x): x_train = x[train, :] y_train = y[train, :] x_test = x[test, :] y_test = y[test, :] # from multilabel to multiclass based on independencec assumption x_train, y_train = multiClass(x_train, y_train) # feature standardization # scaler = preprocessing.StandardScaler().fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) # training and predict result = logRegFeatureEmotion(x_train, y_train, x_test) # performance measure y_pred, coef, interc = result results["perf"].append(perfMeasure(y_pred, y_test)) results["coef"].append(coef) results["interc"].append(interc) for key in results.keys(): item = np.array(results[key]) mean = np.nanmean(item, axis=0) std = np.nanstd(item, axis=0) results[key] = [mean, std] return results