Esempio n. 1
0
def bootstrap():

    #dataset = Processing().import_data()
    for dataset, filename in Processing().import_single_data():
        print(filename)
        training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
        ).separate_data(dataset)

        # print('train shape', training_data_X.shape)
        training_data_X = training_data_X.tolist()
        training_data_y = training_data_y.tolist()

        from PyOptimize.General_Opt import Test_function

        def LTR(a, **kwargs):
            return Test_function().LTR(a, **kwargs)

        ga = pyGaft(objfunc=LTR,
                    var_bounds=[(-2, 2)] * 20,
                    individual_size=50,
                    max_iter=10,
                    max_or_min='max',
                    X=training_data_X,
                    y=training_data_y).run()

        importlib.reload(best_fit)

        a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
        print('a = {0}'.format(a))
        pred_y = []
        for test_x in testing_data_X:
            pred_y.append(np.dot(test_x, a))

        fpa = PerformanceMeasure(testing_data_y, pred_y).FPA()

        print('fpa = {0}'.format(fpa))
Esempio n. 2
0
def bootstrap():

    #dataset = Processing().import_data()
    count = 0
    for dataset, filename in Processing().import_single_data():
        print(filename)
        count += 1
        training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
        ).separate_data(dataset)

        # print('train shape', training_data_X.shape)
        # 1.降序排列训练集(Processing中已完成)

        # 2.利用transfrom_pairwise() 得到Pi,ri
        # P是一个矩阵,每个向量是两个x相减的结果
        # r是一个向量 因为排序过,所以结果r = [1,1,1,1,1,1...]
        rs = RankSVM()
        P, r = rs.transform_pairwise(training_data_X, training_data_y)
        #print('p shape ', P.shape, 'r len ', len(r))
        P = P.tolist()
        r = r.tolist()
        print('type of P ', type(P[0][0]), 'type of r ', type(r[0]))
        # P = [[1, 1, 2], [1, -1, 3], [3, 2, 1], [1, -5, 1], [2, 1, -2]]
        # r = [1, 1, 1, 1, 1]

        # 3.用training_data_y计算u,n
        u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs')
        # print(len(u), len(n))
        print(type(u[0]), type(n[0]))

        # 4. 将Pi,ri,u,n导入genetic algorithm 计算w
        from PyOptimize.General_Opt import Test_function

        def Loss(x, **kwargs):
            return Test_function().Loss(x, **kwargs)

        ga = pyGaft(objfunc=Loss,
                    var_bounds=[(-2, 2)] * 20,
                    individual_size=50,
                    max_iter=200,
                    max_or_min='min',
                    P=P,
                    r=r,
                    u=u,
                    n=n).run()
        # 5.编写predict3
        # w 从best_fit中获得
        if count == 1:
            import best_fit
        else:
            importlib.reload(best_fit)
        w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
        print('w = ', w)
        rs_pred_y = RankSVM(w=w).predict3(testing_data_X)
        rs_pred_y = np.around(rs_pred_y)
        rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA()
        print('rs_fpa:', rs_fpa)
        # f1.append(rs_fpa)

        # RankSVM 效果
        from sklearn.utils import shuffle
        X_shuf, y_shuf = shuffle(training_data_X, training_data_y)
        rs2 = RankSVM().fit(X_shuf, y_shuf)
        rs_pred_y2 = np.around(rs2.predict2(testing_data_X))
        rs_fpa2 = PerformanceMeasure(testing_data_y, rs_pred_y2).FPA()
        rs_aae_result = PerformanceMeasure(testing_data_y, rs_pred_y2).AAE()
        print('rs_fpa2:', rs_fpa2)
Esempio n. 3
0
def bootstrap(dataset):

    count = 0
    training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
    ).separate_data(dataset)

    # cost sensitive ranking SVM
    csrs = RankSVM()
    P, r = csrs.transform_pairwise(training_data_X, training_data_y)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs')

    count += 1
    global Loss
    csga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    csrs_pred_y = RankSVM(w=w).predict3(testing_data_X)
    csrs_fpa = PerformanceMeasure(testing_data_y, csrs_pred_y).FPA()
    csrs_fpa_list.append(csrs_fpa)

    csrs_pofb = PerformanceMeasure(testing_data_y, csrs_pred_y).PofB20()
    csrs_pofd = PerformanceMeasure(testing_data_y, csrs_pred_y).PofD20()
    csrs_ranking = PerformanceMeasure(testing_data_y, csrs_pred_y).ranking()
    csrs_pofb_list.append(csrs_pofb)
    csrs_pofd_list.append(csrs_pofd)
    csrs_ranking_list.append(csrs_ranking)

    # IR SVM
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='ir')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    irsvm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    irsvm_fpa = PerformanceMeasure(testing_data_y, irsvm_pred_y).FPA()
    #print('irsvm_fpa:', irsvm_fpa)
    irsvm_fpa_list.append(irsvm_fpa)

    irsvm_pofb = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofB20()
    irsvm_pofd = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofD20()
    irsvm_ranking = PerformanceMeasure(testing_data_y, irsvm_pred_y).ranking()
    irsvm_pofb_list.append(irsvm_pofb)
    irsvm_pofd_list.append(irsvm_pofd)
    irsvm_ranking_list.append(irsvm_ranking)

    # 这里还要加个去掉另一个参数的
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='svm')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    svm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    svm_fpa = PerformanceMeasure(testing_data_y, svm_pred_y).FPA()
    #print('svm_fpa:', svm_fpa)
    svm_fpa_list.append(svm_fpa)

    svm_pofb = PerformanceMeasure(testing_data_y, svm_pred_y).PofB20()
    svm_pofd = PerformanceMeasure(testing_data_y, svm_pred_y).PofD20()
    svm_ranking = PerformanceMeasure(testing_data_y, svm_pred_y).ranking()
    svm_pofb_list.append(svm_pofb)
    svm_pofd_list.append(svm_pofd)
    svm_ranking_list.append(svm_ranking)

    # 这个是LTR
    training_datalist_X = training_data_X.tolist()
    training_datalist_y = training_data_y.tolist()

    from PyOptimize.General_Opt import Test_function

    count += 1
    global LTR
    ltrga = pyGaft(objfunc=LTR,
                   var_bounds=[(-20, 20)] * 20,
                   individual_size=100,
                   max_iter=2,
                   max_or_min='max',
                   X=training_datalist_X,
                   y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('a = {0}'.format(a))
    ltr_pred_y = []
    for test_x in testing_data_X:
        ltr_pred_y.append(np.dot(test_x, a))
    ltr_fpa = PerformanceMeasure(testing_data_y, ltr_pred_y).FPA()
    # print('ltr_fpa', ltr_fpa)
    ltr_fpa_list.append(ltr_fpa)

    ltr_pofb = PerformanceMeasure(testing_data_y, ltr_pred_y).PofB20()
    ltr_pofd = PerformanceMeasure(testing_data_y, ltr_pred_y).PofD20()
    ltr_ranking = PerformanceMeasure(testing_data_y, ltr_pred_y).ranking()
    ltr_pofb_list.append(ltr_pofb)
    ltr_pofd_list.append(ltr_pofd)
    ltr_ranking_list.append(ltr_ranking)

    # 在原始数据集上训练Ranking SVM,DTR,LR,BRR

    # 这里加上了shuffle,是为了让r的值不全为1,全为1svm会报错
    shuf_X, shuf_y = shuffle(training_data_X, training_data_y)
    rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    rs_pred_y = np.around(rs.predict2(testing_data_X))
    rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA()
    # print('rs_fpa:', rs_fpa)
    rs_fpa_list.append(rs_fpa)

    rs_pofb = PerformanceMeasure(testing_data_y, rs_pred_y).PofB20()
    rs_pofd = PerformanceMeasure(testing_data_y, rs_pred_y).PofD20()
    rs_ranking = PerformanceMeasure(testing_data_y, rs_pred_y).ranking()
    rs_pofb_list.append(rs_pofb)
    rs_pofd_list.append(rs_pofd)
    rs_ranking_list.append(rs_ranking)

    dtr = DecisionTreeRegressor().fit(training_data_X, training_data_y)
    dtr_pred_y = dtr.predict(testing_data_X)
    dtr_fpa = PerformanceMeasure(testing_data_y, dtr_pred_y).FPA()
    # print('dtr_fpa:', dtr_fpa)
    dtr_fpa_list.append(dtr_fpa)

    dtr_pofb = PerformanceMeasure(testing_data_y, dtr_pred_y).PofB20()
    dtr_pofd = PerformanceMeasure(testing_data_y, dtr_pred_y).PofD20()
    dtr_ranking = PerformanceMeasure(testing_data_y, dtr_pred_y).ranking()
    dtr_pofb_list.append(dtr_pofb)
    dtr_pofd_list.append(dtr_pofd)
    dtr_ranking_list.append(dtr_ranking)

    lr = linear_model.LinearRegression().fit(training_data_X, training_data_y)
    lr_pred_y = lr.predict(testing_data_X)
    lr_fpa = PerformanceMeasure(testing_data_y, lr_pred_y).FPA()
    # print('lr_fpa:', lr_fpa)
    lr_fpa_list.append(lr_fpa)

    lr_pofb = PerformanceMeasure(testing_data_y, lr_pred_y).PofB20()
    lr_pofd = PerformanceMeasure(testing_data_y, lr_pred_y).PofD20()
    lr_ranking = PerformanceMeasure(testing_data_y, lr_pred_y).ranking()
    lr_pofb_list.append(lr_pofb)
    lr_pofd_list.append(lr_pofd)
    lr_ranking_list.append(lr_ranking)

    brr = BayesianRidge().fit(training_data_X, training_data_y)
    brr_pred_y = brr.predict(testing_data_X)
    brr_fpa = PerformanceMeasure(testing_data_y, brr_pred_y).FPA()
    # print('brr_fpa:', brr_fpa)
    brr_fpa_list.append(brr_fpa)

    brr_pofb = PerformanceMeasure(testing_data_y, brr_pred_y).PofB20()
    brr_pofd = PerformanceMeasure(testing_data_y, brr_pred_y).PofD20()
    brr_ranking = PerformanceMeasure(testing_data_y, brr_pred_y).ranking()
    brr_pofb_list.append(brr_pofb)
    brr_pofd_list.append(brr_pofd)
    brr_ranking_list.append(brr_ranking)

    # 先对训练数据集进行RUS处理,然后训练Ranking SVM, DTR,LR,BRR
    rus_X, rus_y, _id = RandomUnderSampler(ratio=1.0,
                                           return_indices=True).fit_sample(
                                               training_data_X,
                                               training_data_y)

    # LTR
    training_datalist_X = rus_X.tolist()
    training_datalist_y = rus_y.tolist()

    from PyOptimize.General_Opt import Test_function
    count += 1
    rus_ltrga = pyGaft(objfunc=LTR,
                       var_bounds=[(-20, 20)] * 20,
                       individual_size=100,
                       max_iter=2,
                       max_or_min='max',
                       X=training_datalist_X,
                       y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    rus_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('rus_a = {0}'.format(rus_a))
    rus_ltr_pred_y = []
    for test_x in testing_data_X:
        rus_ltr_pred_y.append(np.dot(test_x, rus_a))
    rus_ltr_fpa = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).FPA()
    # print('rus_ltr_fpa', rus_ltr_fpa)
    rus_ltr_fpa_list.append(rus_ltr_fpa)

    rus_ltr_pofb = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofB20()
    rus_ltr_pofd = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofD20()
    rus_ltr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_ltr_pred_y).ranking()
    rus_ltr_pofb_list.append(rus_ltr_pofb)
    rus_ltr_pofd_list.append(rus_ltr_pofd)
    rus_ltr_ranking_list.append(rus_ltr_ranking)

    shuf_X, shuf_y = shuffle(rus_X, rus_y)
    rus_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    rus_rs_pred_y = rus_rs.predict2(testing_data_X)
    rus_rs_fpa = PerformanceMeasure(testing_data_y, rus_rs_pred_y).FPA()
    # print('rus_rs_fpa:', rus_rs_fpa)
    rus_rs_fpa_list.append(rus_rs_fpa)

    rus_rs_pofb = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofB20()
    rus_rs_pofd = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofD20()
    rus_rs_ranking = PerformanceMeasure(testing_data_y,
                                        rus_rs_pred_y).ranking()
    rus_rs_pofb_list.append(rus_rs_pofb)
    rus_rs_pofd_list.append(rus_rs_pofd)
    rus_rs_ranking_list.append(rus_rs_ranking)

    rus_dtr = DecisionTreeRegressor().fit(rus_X, rus_y)
    rus_dtr_pred_y = rus_dtr.predict(testing_data_X)
    rus_dtr_fpa = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).FPA()
    # print('rus_dtr_fpa:', rus_dtr_fpa)
    rus_dtr_fpa_list.append(rus_dtr_fpa)

    rus_dtr_pofb = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofB20()
    rus_dtr_pofd = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofD20()
    rus_dtr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_dtr_pred_y).ranking()
    rus_dtr_pofb_list.append(rus_dtr_pofb)
    rus_dtr_pofd_list.append(rus_dtr_pofd)
    rus_dtr_ranking_list.append(rus_dtr_ranking)

    rus_lr = linear_model.LinearRegression().fit(rus_X, rus_y)
    rus_lr_pred_y = rus_lr.predict(testing_data_X)
    rus_lr_fpa = PerformanceMeasure(testing_data_y, rus_lr_pred_y).FPA()
    # print('rus_lr_fpa:', rus_lr_fpa)
    rus_lr_fpa_list.append(rus_lr_fpa)

    rus_lr_pofb = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofB20()
    rus_lr_pofd = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofD20()
    rus_lr_ranking = PerformanceMeasure(testing_data_y,
                                        rus_lr_pred_y).ranking()
    rus_lr_pofb_list.append(rus_lr_pofb)
    rus_lr_pofd_list.append(rus_lr_pofd)
    rus_lr_ranking_list.append(rus_lr_ranking)

    rus_brr = BayesianRidge().fit(rus_X, rus_y)
    rus_brr_pred_y = rus_brr.predict(testing_data_X)
    rus_brr_fpa = PerformanceMeasure(testing_data_y, rus_brr_pred_y).FPA()
    # print('rus_brr_fpa:', rus_brr_fpa)
    rus_brr_fpa_list.append(rus_brr_fpa)

    rus_brr_pofb = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofB20()
    rus_brr_pofd = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofD20()
    rus_brr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_brr_pred_y).ranking()
    rus_brr_pofb_list.append(rus_brr_pofb)
    rus_brr_pofd_list.append(rus_brr_pofd)
    rus_brr_ranking_list.append(rus_brr_ranking)

    # 先对训练数据集进行Smote处理,然后训练Ranking SVM, DTR,LR,BRR
    smote_X, smote_y = Smote(training_data_X, training_data_y, ratio=1.0,
                             k=5).over_sampling_addorginaldata()

    # LTR
    training_datalist_X = smote_X.tolist()
    training_datalist_y = smote_y.tolist()

    from PyOptimize.General_Opt import Test_function
    count += 1
    smote_ltrga = pyGaft(objfunc=LTR,
                         var_bounds=[(-20, 20)] * 20,
                         individual_size=100,
                         max_iter=2,
                         max_or_min='max',
                         X=training_datalist_X,
                         y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    smote_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][
        -1]

    smote_ltr_pred_y = []
    for test_x in testing_data_X:
        smote_ltr_pred_y.append(np.dot(test_x, smote_a))
    smote_ltr_fpa = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).FPA()

    smote_ltr_fpa_list.append(smote_ltr_fpa)

    smote_ltr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_ltr_pred_y).PofB20()
    smote_ltr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_ltr_pred_y).PofD20()
    smote_ltr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_ltr_pred_y).ranking()
    smote_ltr_pofb_list.append(smote_ltr_pofb)
    smote_ltr_pofd_list.append(smote_ltr_pofd)
    smote_ltr_ranking_list.append(smote_ltr_ranking)

    shuf_X, shuf_y = shuffle(smote_X, smote_y)

    smote_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    smote_rs_pred_y = smote_rs.predict2(testing_data_X)
    smote_rs_fpa = PerformanceMeasure(testing_data_y, smote_rs_pred_y).FPA()
    smote_rs_fpa_list.append(smote_rs_fpa)
    smote_rs_pofb = PerformanceMeasure(testing_data_y,
                                       smote_rs_pred_y).PofB20()
    smote_rs_pofd = PerformanceMeasure(testing_data_y,
                                       smote_rs_pred_y).PofD20()
    smote_rs_ranking = PerformanceMeasure(testing_data_y,
                                          smote_rs_pred_y).ranking()
    smote_rs_pofb_list.append(smote_rs_pofb)
    smote_rs_pofd_list.append(smote_rs_pofd)
    smote_rs_ranking_list.append(smote_rs_ranking)

    smote_dtr = DecisionTreeRegressor().fit(smote_X, smote_y)
    smote_dtr_pred_y = smote_dtr.predict(testing_data_X)
    smote_dtr_fpa = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).FPA()
    smote_dtr_fpa_list.append(smote_dtr_fpa)
    smote_dtr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_dtr_pred_y).PofB20()
    smote_dtr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_dtr_pred_y).PofD20()
    smote_dtr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_dtr_pred_y).ranking()
    smote_dtr_pofb_list.append(smote_dtr_pofb)
    smote_dtr_pofd_list.append(smote_dtr_pofd)
    smote_dtr_ranking_list.append(smote_dtr_ranking)

    smote_lr = linear_model.LinearRegression().fit(smote_X, smote_y)
    smote_lr_pred_y = smote_lr.predict(testing_data_X)
    smote_lr_fpa = PerformanceMeasure(testing_data_y, smote_lr_pred_y).FPA()
    smote_lr_fpa_list.append(smote_lr_fpa)
    smote_lr_pofb = PerformanceMeasure(testing_data_y,
                                       smote_lr_pred_y).PofB20()
    smote_lr_pofd = PerformanceMeasure(testing_data_y,
                                       smote_lr_pred_y).PofD20()
    smote_lr_ranking = PerformanceMeasure(testing_data_y,
                                          smote_lr_pred_y).ranking()
    smote_lr_pofb_list.append(smote_lr_pofb)
    smote_lr_pofd_list.append(smote_lr_pofd)
    smote_lr_rankig_list.append(smote_lr_rankig)

    smote_brr = BayesianRidge().fit(smote_X, smote_y)
    smote_brr_pred_y = smote_brr.predict(testing_data_X)
    smote_brr_fpa = PerformanceMeasure(testing_data_y, smote_brr_pred_y).FPA()
    smote_brr_fpa_list.append(smote_brr_fpa)
    smote_brr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_brr_pred_y).PofB20()
    smote_brr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_brr_pred_y).PofD20()
    smote_brr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_brr_pred_y).ranking()
    smote_brr_pofb_list.append(smote_brr_pofb)
    smote_brr_pofd_list.append(smote_brr_pofd)
    smote_brr_ranking_list.append(smote_brr_ranking)
Esempio n. 4
0
def bootstrap(dataset):

    training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
    ).separate_data(dataset)

    training_data_X2, training_data_y2, testing_data_X2, testing_data_y2 = IG(
        training_data_X, training_data_y, testing_data_X,
        testing_data_y).getSelectedFeature(2)

    training_data_X3, training_data_y3, testing_data_X3, testing_data_y3 = IG(
        training_data_X, training_data_y, testing_data_X,
        testing_data_y).getSelectedFeature(3)

    training_data_X5, training_data_y5, testing_data_X5, testing_data_y5 = IG(
        training_data_X, training_data_y, testing_data_X,
        testing_data_y).getSelectedFeature(5)

    training_data_X8, training_data_y8, testing_data_X8, testing_data_y8 = IG(
        training_data_X, training_data_y, testing_data_X,
        testing_data_y).getSelectedFeature(8)
    training_data_X13, training_data_y13, testing_data_X13, testing_data_y13 = IG(
        training_data_X, training_data_y, testing_data_X,
        testing_data_y).getSelectedFeature(13)

    # cost sensitive ranking SVM with the number of features as 2
    csrs2 = RankSVM()
    P, r = csrs2.transform_pairwise(training_data_X2, training_data_y2)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y2).calc_UN(type='cs')

    global Loss
    csga2 = pyGaft(objfunc=Loss,
                   var_bounds=[(-1, 1)] * 2,
                   individual_size=500,
                   max_iter=200,
                   max_or_min='min',
                   P=P,
                   r=r,
                   u=u,
                   n=n).run()

    importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    csrs_pred_y2 = RankSVM(w=w).predict3(testing_data_X2)
    csrs_fpa2 = PerformanceMeasure(testing_data_y2, csrs_pred_y2).FPA()
    # print('csrs_fpa:', csrs_fpa)
    csrs_fpa_list2.append(csrs_fpa2)

    # cost sensitive ranking SVM with the number of features as 3
    csrs3 = RankSVM()
    P, r = csrs3.transform_pairwise(training_data_X3, training_data_y3)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y3).calc_UN(type='cs')

    first = False
    csga3 = pyGaft(objfunc=Loss,
                   var_bounds=[(-1, 1)] * 3,
                   individual_size=500,
                   max_iter=200,
                   max_or_min='min',
                   P=P,
                   r=r,
                   u=u,
                   n=n).run()

    importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    # print('w = ', w)
    csrs_pred_y3 = RankSVM(w=w).predict3(testing_data_X3)
    csrs_fpa3 = PerformanceMeasure(testing_data_y3, csrs_pred_y3).FPA()
    # print('irsvm_fpa:', irsvm_fpa)
    csrs_fpa_list3.append(csrs_fpa3)

    # cost sensitive ranking SVM with the number of features as 5
    csrs5 = RankSVM()
    P, r = csrs5.transform_pairwise(training_data_X5, training_data_y5)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y5).calc_UN(type='cs')

    csga5 = pyGaft(objfunc=Loss,
                   var_bounds=[(-1, 1)] * 5,
                   individual_size=500,
                   max_iter=200,
                   max_or_min='min',
                   P=P,
                   r=r,
                   u=u,
                   n=n).run()
    # if count == 1:
    #     import best_fit
    # else:
    importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    # print('w = ', w)
    csrs_pred_y5 = RankSVM(w=w).predict3(testing_data_X5)
    csrs_fpa5 = PerformanceMeasure(testing_data_y5, csrs_pred_y5).FPA()
    # print('irsvm_fpa:', irsvm_fpa)
    csrs_fpa_list5.append(csrs_fpa5)

    # cost sensitive ranking SVM with the number of features as 8
    csrs8 = RankSVM()
    P, r = csrs8.transform_pairwise(training_data_X8, training_data_y8)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y8).calc_UN(type='cs')

    csga8 = pyGaft(objfunc=Loss,
                   var_bounds=[(-1, 1)] * 8,
                   individual_size=500,
                   max_iter=200,
                   max_or_min='min',
                   P=P,
                   r=r,
                   u=u,
                   n=n).run()

    importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    # print('w = ', w)
    csrs_pred_y8 = RankSVM(w=w).predict3(testing_data_X8)
    csrs_fpa8 = PerformanceMeasure(testing_data_y8, csrs_pred_y8).FPA()
    # print('irsvm_fpa:', irsvm_fpa)
    csrs_fpa_list8.append(csrs_fpa8)

    # cost sensitive ranking SVM with the number of features as 13
    csrs13 = RankSVM()
    P, r = csrs13.transform_pairwise(training_data_X13, training_data_y13)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y13).calc_UN(type='cs')

    csga13 = pyGaft(objfunc=Loss,
                    var_bounds=[(-1, 1)] * 13,
                    individual_size=500,
                    max_iter=200,
                    max_or_min='min',
                    P=P,
                    r=r,
                    u=u,
                    n=n).run()

    importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    # print('w = ', w)
    csrs_pred_y13 = RankSVM(w=w).predict3(testing_data_X13)
    csrs_fpa13 = PerformanceMeasure(testing_data_y13, csrs_pred_y13).FPA()
    # print('irsvm_fpa:', irsvm_fpa)
    csrs_fpa_list13.append(csrs_fpa13)
    print(f'first={first}')
Esempio n. 5
0
def bootstrap(training_data_X, training_data_y, testing_data_X, testing_data_y,
              dataset, trainingfilename, testingfilename):

    trainingdataname.append(trainingfilename)
    print('trainingdata', trainingfilename)
    count = 0
    # cost sensitive ranking SVM
    csrs = RankSVM()
    P, r = csrs.transform_pairwise(training_data_X, training_data_y)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs')
    count += 1
    global Loss
    csga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=200,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    csrs_pred_y = RankSVM(w=w).predict3(testing_data_X)
    csrs_fpa = PerformanceMeasure(testing_data_y, csrs_pred_y).FPA()
    csrs_pofb = PerformanceMeasure(testing_data_y, csrs_pred_y).PofB20()
    csrs_fpa_list.append(csrs_fpa)
    csrs_pofb_list.append(csrs_pofb)

    # IR SVM
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='ir')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=200,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    irsvm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    irsvm_fpa = PerformanceMeasure(testing_data_y, irsvm_pred_y).FPA()
    irsvm_pofb = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofB20()
    irsvm_fpa_list.append(irsvm_fpa)
    irsvm_pofb_list.append(irsvm_pofb)

    # 这里还要加个去掉另一个参数的
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='svm')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=200,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    svm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    svm_fpa = PerformanceMeasure(testing_data_y, svm_pred_y).FPA()
    svm_pofb = PerformanceMeasure(testing_data_y, svm_pred_y).PofB20()
    svm_fpa_list.append(svm_fpa)
    svm_pofb_list.append(svm_pofb)

    # 这个是LTR
    training_datalist_X = training_data_X.tolist()
    training_datalist_y = training_data_y.tolist()

    from PyOptimize.General_Opt import Test_function

    count += 1
    global LTR
    ltrga = pyGaft(objfunc=LTR,
                   var_bounds=[(-20, 20)] * 20,
                   individual_size=100,
                   max_iter=200,
                   max_or_min='max',
                   X=training_datalist_X,
                   y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('a = {0}'.format(a))
    ltr_pred_y = []
    for test_x in testing_data_X:
        ltr_pred_y.append(np.dot(test_x, a))
    ltr_fpa = PerformanceMeasure(testing_data_y, ltr_pred_y).FPA()
    ltr_pofb = PerformanceMeasure(testing_data_y, ltr_pred_y).PofB20()
    ltr_fpa_list.append(ltr_fpa)
    ltr_pofb_list.append(ltr_pofb)

    # 在原始数据集上训练Ranking SVM,DTR,LR,BRR

    # 这里加上了shuffle,是为了让r的值不全为1,全为1svm会报错
    shuf_X, shuf_y = shuffle(training_data_X, training_data_y)
    rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    rs_pred_y = np.around(rs.predict2(testing_data_X))
    rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA()
    rs_pofb = PerformanceMeasure(testing_data_y, rs_pred_y).PofB20()
    rs_fpa_list.append(rs_fpa)
    rs_pofb_list.append(rs_pofb)

    dtr = DecisionTreeRegressor().fit(training_data_X, training_data_y)
    dtr_pred_y = dtr.predict(testing_data_X)
    dtr_fpa = PerformanceMeasure(testing_data_y, dtr_pred_y).FPA()
    dtr_pofb = PerformanceMeasure(testing_data_y, dtr_pred_y).PofB20()
    dtr_fpa_list.append(dtr_fpa)
    dtr_pofb_list.append(dtr_pofb)

    lr = linear_model.LinearRegression().fit(training_data_X, training_data_y)
    lr_pred_y = lr.predict(testing_data_X)
    lr_fpa = PerformanceMeasure(testing_data_y, lr_pred_y).FPA()
    lr_pofb = PerformanceMeasure(testing_data_y, lr_pred_y).PofB20()
    lr_fpa_list.append(lr_fpa)
    lr_pofb_list.append(lr_pofb)

    brr = BayesianRidge().fit(training_data_X, training_data_y)
    brr_pred_y = brr.predict(testing_data_X)
    brr_fpa = PerformanceMeasure(testing_data_y, brr_pred_y).FPA()
    brr_pofb = PerformanceMeasure(testing_data_y, brr_pred_y).PofB20()
    brr_fpa_list.append(brr_fpa)
    brr_pofb_list.append(brr_pofb)