コード例 #1
0
ファイル: main.py プロジェクト: qingyangtan/BAISE-DSG
from pipeline import Pipe
from model import xgboost_model, lightgbm_model
from sklearn.cross_validation import train_test_split
import xgboost as xgb
import lightgbm as lgb

if __name__ == '__main__':

    train_path = 'data/train_test.csv'
    val_path = 'data/train_ensemble.csv'
    test_path = 'data/test_clean.csv'

    firsttime = True
    if firsttime:
        training_pipe = Pipe(train_path, val_path)
        train = training_pipe.make('pickle/train')
        train.to_csv('train_feature.csv', index=False)

        testing_pipe = Pipe(val_path, test_path)
        test = testing_pipe.make('pickle/test')
        test.to_csv('test_features.csv', index=False)
    else:
        train = pd.read_csv('train_feature.csv')
        test = pd.read_csv('test_features.csv')

    featlist = train.columns.tolist()
    featlist.remove('is_listened')
    X = train[featlist].as_matrix()
    y = train['is_listened'].as_matrix()

    test_X = test[featlist].as_matrix()
コード例 #2
0
if __name__ == '__main__':

    # Input files
    train_path = 'data/archive/train_clean.csv'
    test_path = 'data/archive/test_clean.csv'

    # Intermediate files
    TRAIN_PATH_INTERMEDIATE = 'data/archive/train_intermediate.csv'
    TEST_PATH_INTERMEDIATE = 'data/archive/test_intermediate.csv'

    # Set boolean to use pre-made features or build on the fly
    firsttime = True
    if firsttime:
        training_pipe = Pipe(train_path, train_path)
        train = training_pipe.make('train_intermediate')
        train.to_csv(TRAIN_PATH_INTERMEDIATE, index=False)

        testing_pipe = Pipe(train_path, test_path)
        test = testing_pipe.make('test_intermediate')
        test.to_csv(TEST_PATH_INTERMEDIATE, index=False)
    else:
        train = pd.read_csv('train_feature.csv')
        test = pd.read_csv('test_features.csv')

    # Preparing train and test dataset for ensemble layer
    featlist = train.columns.tolist()
    featlist.remove('is_listened')

    X = train[featlist].as_matrix()
    y = train['is_listened'].as_matrix()