コード例 #1
0
def load_data_from_raw(raw_data):
    '''
    加载原始数据
    :param raw_data:
    :return:
    '''

    with open(raw_data, 'r', encoding='utf-8') as testfile:
        test_X, test_y, test_qids, comments = read_dataset(testfile)
    return test_X, test_y, test_qids, comments
コード例 #2
0
ファイル: lgb_ltr.py プロジェクト: yzu2ustc/learning_to_rank
def test_data_ndcg(model_path, test_path):
    '''
    评估测试数据的ndcg
    '''
    with open(test_path, 'r', encoding='utf-8') as testfile:
        test_X, test_y, test_qids, comments = read_dataset(testfile)

    gbm = lgb.Booster(model_file=model_path)
    test_predict = gbm.predict(test_X)

    average_ndcg, _ = validate(test_qids, test_y, test_predict, 60)
    # 所有qid的平均ndcg
    print("all qid average ndcg: ", average_ndcg)
    print("job done!")
コード例 #3
0
ファイル: lgb_ltr.py プロジェクト: yzu2ustc/learning_to_rank
        train_start = datetime.now()
        x_train, y_train, q_train = load_data(data_feats, data_group)
        train(x_train, y_train, q_train, model_path)
        train_end = datetime.now()
        consume_time = (train_end - train_start).seconds
        print("consume time : {}".format(consume_time))

    elif sys.argv[1] == '-predict':
        train_start = datetime.now()
        predict_data_path = base_path + '/data/test/test.txt'  #格式如ranklib中的数据格式
        test_X, test_y, test_qids, comments = load_data_from_raw(raw_data_path)
        t_results = predict(test_X, comments, model_path)
        print(t_results)
        train_end = datetime.now()
        consume_time = (train_end - train_start).seconds
        print("consume time : {}".format(consume_time))

    elif sys.argv[1] == '-ndcg':
        # ndcg
        test_path = base_path + '/data/test/test.txt'  #评估测试数据的平均ndcg
        test_data_ndcg(model_path, test_path)

    elif sys.argv[1] == '-feature':
        plot_print_feature_importance(model_path)

    elif sys.argv[1] == '-leaf':
        #利用模型得到样本叶结点的one-hot表示
        raw_data = base_path + '/data/test/leaf.txt'
        with open(raw_data, 'r', encoding='utf-8') as testfile:
            test_X, test_y, test_qids, comments = read_dataset(testfile)
        get_leaf_index(test_X, model_path)