コード例 #1
0
def save(DATASET, NUM_FOLD, ENSEMBLE, ALGORITHM, sparse=False):
    NUM_GENES = None
    SEED = 1313
    NTREES = 300
    SUB_CROSS = 3
    METRIC = 'NDCG'
    if DATASET == '2003_td_dataset':
        NUM_GENES = 64
    elif DATASET == 'web10k':
        NUM_GENES = 136
    elif DATASET == 'yahoo':
        NUM_GENES = 700
    elif DATASET in ['movielens', 'lastfm', 'bibsonomy', 'youtube']:
        NUM_GENES = 13

    else:
        print('DATASET INVÁLIDO')

    X_train, y_train, query_id_train = l2rCodesSerial.load_L2R_file(
        './dataset/' + DATASET + '/Fold' + NUM_FOLD + '/Norm.' + 'train' +
        '.txt', '1' * NUM_GENES, sparse)
    # X_test, y_test, query_id_test = l2rCodesSerial.load_L2R_file(
    #     './dataset/' + DATASET + '/Fold' + NUM_FOLD + '/Norm.' + 'test' + '.txt', '1' * NUM_GENES, sparse)

    scoreTest = [0] * len(y_train)
    model = l2rCodesSerial.getTheModel(1, NTREES, 0.3, SEED, DATASET)
    model.fit(X_train, y_train)
    resScore = model.predict(X_train)
    c = 0
    for i in resScore:
        scoreTest[c] = i
        c = c + 1

    ndcg, queries = l2rCodesSerial.getEvaluation(scoreTest, query_id_train,
                                                 y_train, DATASET, METRIC,
                                                 "test")

    f = open(
        './baselines/' + DATASET + '/Fold' + NUM_FOLD + '/' + ALGORITHM +
        'train.txt', "w+")
    for i in range(len(queries)):
        f.write(str(queries[i]) + '\n')
        # f.write(str(queries[i]))
    f.close()
def getPrecisionAndQueries(individuo, NUM_GENES, X_train, y_train, X_test,
                           y_test, query_id_train, ENSEMBLE, NTREES, SEED,
                           DATASET, METRIC):
    # list_mask = list(individuo)
    # features = []
    # for i in range(NUM_GENES):
    #     if list_mask[i] == '1':
    #         features.append(i)
    # X_train_ind = X_train[:, features]
    # # X_test_ind = X_test[:, features]
    #
    # queriesList = l2rCodesSerial.getQueries(query_id_train)
    # scoreTrain = [0] * len(y_train)
    #
    # kf = model_selection.KFold(n_splits=3, shuffle=True, random_state=SEED)
    # XF_train_index = []
    # XF_test_index = []
    #
    # for qtrain_index, qtest_index in kf.split(queriesList):
    #
    #     del XF_train_index[:]
    #     del XF_test_index[:]
    #
    #     for qtr in qtrain_index:
    #         XF_train_index = XF_train_index + queriesList[qtr]
    #     for qts in qtest_index:
    #         XF_test_index = XF_test_index + queriesList[qts]
    #
    #     XF_train, XF_test = X_train_ind[XF_train_index], X_train_ind[XF_test_index]  ####
    #     yf_train, yf_test = y_train[XF_train_index], y_train[XF_test_index]
    #     # qf_train, qf_test = query_id_train[XF_train_index], query_id_train[XF_test_index]
    #
    #     # resScore = None
    #     model = l2rCodesSerial.getTheModel(ENSEMBLE, NTREES, 0.3, SEED, DATASET)
    #
    #     model.fit(XF_train, yf_train)
    #     resScore = model.predict(XF_test)
    #
    #     # sortRespRel=[rel for (score, rel) in sorted(zip(resScore,yf_test), key=lambda pair: pair[0], reverse=True)]
    #     c = 0
    #     for i in XF_test_index:
    #         scoreTrain[i] = resScore[c]
    #         c = c + 1
    # ndcg, queries = l2rCodesSerial.getEvaluation(scoreTrain, query_id_train, y_train, DATASET, METRIC, "test")
    # return ndcg, queries, scoreTrain

    list_mask = list(individuo)
    features = []
    for i in range(NUM_GENES):
        if list_mask[i] == '1':
            features.append(i)
    X_train_ind = X_train[:, features]

    scoreTrain = [0] * len(y_train)

    # model = linear_model.LinearRegression(n_jobs=-1)
    model = l2rCodesSerial.getTheModel(ENSEMBLE, NTREES, 0.3, SEED, DATASET)

    model.fit(X_train_ind, y_train)
    resScore = model.predict(X_train_ind)

    # sortRespRel=[rel for (score, rel) in sorted(zip(resScore,yf_test), key=lambda pair: pair[0], reverse=True)]
    c = 0
    for i in range(len(resScore)):
        scoreTrain[i] = resScore[c]
        c = c + 1

    ndcg, queries = l2rCodesSerial.getEvaluation(scoreTrain, query_id_train,
                                                 y_train, DATASET, METRIC,
                                                 "test")
    return ndcg, queries, scoreTrain
        # NSGA best IND RF
        ENSEMBLE = 1  # random forest
        NTREES = 20
        SEED = 1887
        NUM_FOLD = '0'
        METRIC = "NDCG"
        sparse = True
        ALGORITHM = 'rf'
        print('reading and training NSGA bestind')
        X_train, y_train, query_id_train = l2rCodesSerial.load_L2R_file(
            './dataset/' + dataset + '/' + NUM_FOLD + '.' + 'train', bestindNSGA, sparse)
        X_test, y_test, query_id_test = l2rCodesSerial.load_L2R_file(
            './dataset/' + dataset + '/' + NUM_FOLD + '.' + 'test', bestindNSGA, sparse)
        # print(len(X_train[0]))

        model = l2rCodesSerial.getTheModel(ENSEMBLE, NTREES, 0.3, SEED, dataset)
        model.fit(X_train, y_train)
        resScore = model.predict(X_test)

        scoreTest = [0] * len(y_test)
        c = 0
        for i in resScore:
            scoreTest[c] = i
            c = c + 1

        ndcg, queries = l2rCodesSerial.getEvaluation(scoreTest, query_id_test, y_test, dataset, METRIC, "test")
        ndcgs.append(queries)
        diversitys.append(evaluateIndividuoSerial.getDiversity(scoreTest, y_test, query_id_test))
        noveltys.append(evaluateIndividuoSerial.getNovelty(scoreTest, y_test, query_id_test))

        # SPEA best IND RF