Exemple #1
0
def crossValidate(x, y, cv=5, K=None):
    """
    :param y: N*L ranking vectors
    :return:
    """
    results = {"perf": []}

    ## cross validation ##
    np.random.seed(1100)
    kf = KFold(n_splits=cv, shuffle=True, random_state=0)
    for train, test in kf.split(x):
        x_train = x[train, :]
        y_train = y[train, :]
        x_test = x[test, :]
        y_test = y[test, :]

        y_pred = KNNMallows(K=K).fit(x_train, y_train).predict(x_test)
        # print y_pred ### test
        results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True))
        # print results["perf"][-1]

    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis=0)
        std = np.nanstd(item, axis=0)
        results[key] = [mean, std]

    return results
def hyperParameter(x, y, x_valid=None, y_valid=None, cv=5, criteria=0):
    if x_valid is None:
        # no validation set, using cross validation #
        alpha_perform = []
        kf = KFold(n_splits=cv, shuffle=True, random_state=0)
        for train, valid in kf.split(x):
            x_train = x[train, :]
            y_train = y[train, :]
            x_valid = x[valid, :]
            y_valid = y[valid, :]

            tree = DecisionTree().buildtree(x_train, y_train)
            alpha_list = tree.alphalist()
            alpha_best = [-1, None]
            for alpha in alpha_list:
                y_pred = tree.predict(x_valid, alpha=alpha)
                perf = perfMeasure(y_pred, y_valid, rankopt=True)
                perf_criteria = perf[criteria]
                if alpha_best[1] is not None and alpha_best[1] > perf_criteria:
                    pass
                else:
                    alpha_best[0] = alpha
                    alpha_best[1] = perf_criteria

            alpha_perform.append(alpha_best)

        alpha_perform = np.array(alpha_perform, dtype=np.float32)
        print "inside hyperparameter:", alpha_perform  ### test
        return np.average(alpha_perform, axis=0)[0]

    else:
        tree = DecisionTree().buildtree(x, y)
        alpha_list = tree.alphalist()
        alpha_best = [-1, None]
        for alpha in alpha_list:
            y_pred = tree.predict(x_valid, alpha=alpha)
            perf = perfMeasure(y_pred, y_valid, rankopt=True)
            perf_criteria = perf[criteria]
            if alpha_best[1] is not None and alpha_best[1] > perf_criteria:
                pass
            else:
                alpha_best[0] = alpha
                alpha_best[1] = perf_criteria
        return alpha_best[0]
def crossValidate(x,y, cv=5, alpha = 0.0, rank_weight = False, stop_criterion_mis_rate = None, stop_criterion_min_node = 1,
                  stop_criterion_gain = 0.0, prune_criteria = 0):

    results = {"alpha": [], "perf": []}

    # cross validation #
    np.random.seed(1100)
    kf = KFold(n_splits=cv, shuffle=True, random_state=0)  ## for testing fixing random_state
    for train, test in kf.split(x):
        x_train = x[train, :]
        y_train = y[train, :]
        x_test = x[test, :]
        y_test = y[test, :]

        # training and predict

        if alpha == None:
            ## nested select validate and test ##
            print "start searching alpha:", datetime.now() ### test
            alpha_sel = hyperParameter(x_train, y_train, criteria=prune_criteria)
            print "finish searching alpha:", datetime.now(), alpha ### test
        else:
            alpha_sel = alpha

        # weight #
        if rank_weight:
            weights = rank2Weight(y_train)
        else:
            weights = None

        tree = DecisionTree().buildtree(x_train,y_train, weights,
                                        stop_criterion_mis_rate= stop_criterion_mis_rate,
                                        stop_criterion_min_node = stop_criterion_min_node,
                                        stop_criterion_gain=stop_criterion_gain)
        # prune #
        alpha_list = tree.alphalist()
        # performance measure
        y_pred = tree.predict(x_test, alpha_sel)
        results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True))
        results["alpha"].append(alpha_sel)
        print alpha_sel, "alpha"

    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis=0)
        std = np.nanstd(item, axis=0)
        results[key] = [mean, std]

    return results
def crossValidate(x, y, cv=5):
    results = {"perf": []}
    np.random.seed(1100)
    kf = KFold(n_splits=cv, shuffle=True, random_state=0)
    for train, test in kf.split(x):
        x_train = x[train, :]
        y_train = y[train, :]
        x_test = x[test, :]
        y_test = y[test, :]

        y_pred = LabelWiseRanking().fit(x_train, y_train).predict(x_test)
        results["perf"].append(perfMeasure(y_pred, y_test, rankopt=True))
    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis=0)
        std = np.nanstd(item, axis=0)
        results[key] = [mean, std]
    return results
def crossValidate(x,y, method = "dT",cv=5, alpha = None, min_node = 1):
    #  error measure
    results = []
    if method == "logReg":
        results = {"perf":[], "coef":[], "interc":[]}
    elif method == "dT":
        results = {"alpha": [], "perf":[]}

    # cross validation #
    np.random.seed(1100)
    kf = KFold(n_splits = cv, shuffle = True, random_state = 0) ## for testing fixing random_state
    for train,test in kf.split(x):
        x_train = x[train,:]
        y_train = y[train,:]
        x_test = x[test,:]
        y_test = y[test,:]

        # training and predict

        if alpha == None:
            ## nested select validate and test ##
            # print "start searching alpha:", datetime.now() ### test
            alpha_sel, perf = DTme.hyperParometer(x_train,y_train)
            # print "finish searching alpha:", datetime.now(), alpha ### test
        else:
            alpha_sel = alpha
        result = decisionTree(x_train, y_train, x_test, alpha = alpha_sel, min_node = min_node)

        # performance measure

        alpha_sel, y_pred = result
        results["perf"].append(perfMeasure(y_pred,y_test,rankopt=True))
        results["alpha"].append(alpha_sel)
        print alpha_sel, "alpha"

    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis = 0)
        std = np.nanstd(item, axis = 0)
        results[key] = [mean, std]

    return results
Exemple #6
0
def crossValidate(x, y, cv=5):

    results = {"perf": [], "coef": [], "interc": []}
    # cross validation #
    np.random.seed(1100)
    kf = KFold(n_splits=cv, shuffle=True,
               random_state=0)  ## for testing fixing random_state
    for train, test in kf.split(x):
        x_train = x[train, :]
        y_train = y[train, :]
        x_test = x[test, :]
        y_test = y[test, :]

        # from multilabel to multiclass based on independencec assumption
        x_train, y_train = multiClass(x_train, y_train)

        # feature standardization #
        scaler = preprocessing.StandardScaler().fit(x_train)
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)

        # training and predict
        result = logRegFeatureEmotion(x_train, y_train, x_test)

        # performance measure
        y_pred, coef, interc = result
        results["perf"].append(perfMeasure(y_pred, y_test))
        results["coef"].append(coef)
        results["interc"].append(interc)

    for key in results.keys():
        item = np.array(results[key])
        mean = np.nanmean(item, axis=0)
        std = np.nanstd(item, axis=0)
        results[key] = [mean, std]

    return results