예제 #1
0
파일: test.py 프로젝트: horoiwa/ensemble
def test():
    df = pd.read_csv('sample/boston.csv')
    y = df['Price']
    X = df.drop(['Price'], 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = Model()
    model.add_layer(DataFrameInputs())
    model.add_layer(PolynomialLayer())
    model.add_layer(StandardScalerLayer())
    model.add_layer(EnsembleGBRLayer(n_models=10, row_ratio=0.5,
                                     col_ratio=0.5, scale=True))
    model.add_layer(EnsembleRidgeLayer(n_models=30, row_ratio=0.6,
                                       col_ratio=0.6, scale=True))
    model.add_layer(AverageLayer())

    model.train(X_train, y_train)
    model.summary()

    model.save('sample/test.pkl')
    with open('sample/test.pkl', 'rb') as f:
        model = pickle.load(f)

    X_pred = model.predict(X_test)
    print("Predict shape", X_pred.shape)

    X_pred = model.predict_proba(X_test)
    print("Predict_proba shape", X_pred[1].shape)

    score = model.score(X_test, y_test)
    print(score)
예제 #2
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_8():
    print("test7")

    df = pd.read_csv('sample/boston2.csv')
    y = df['Price']
    X = df.drop(['Price'], 1)

    model = Model()
    model.add_layer(DataFrameInputs())
    model.add_layer(DuplicateLayer(n=1))
    model.add_layer(MultiheadRidgeLayer(n_models=300, row_ratio=1.,
                                        col_ratio=0.1, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=100, row_ratio=1.,
                                       col_ratio=0.3, scale=True))
    model.add_layer(AverageLayer())


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    pipeline = ClusterDataset(model, n_poly=2, n_cluster=3)
    pipeline.train(X_train, y_train)

    pred, proba = pipeline.predict_proba(X_test)
    plt.scatter(y_test.values, pred)
    plt.show()
예제 #3
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_7():
    print("test7")

    df = pd.read_csv('sample/boston2.csv')
    y = df['Price']
    X = df.drop(['Price'], 1)

    model = Model()
    model.add_layer(DataFrameInputs())
    model.add_layer(DuplicateLayer(n=1))
    model.add_layer(MultiheadSVRLayer(n_models=10, row_ratio=0.8,
                                      col_ratio=0.6, scale=True))
    model.add_layer(MultiheadRidgeLayer(n_models=3, row_ratio=0.8,
                                        col_ratio=0.8, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1.,
                                       col_ratio=1., scale=True))
    model.add_layer(AverageLayer())

    pipeline = TableDataset(model, n_poly=2, prescale=False)
    pipeline.train(X, y)

    X_sample = X.iloc[0, :]
    pred, proba = pipeline.predict_proba(X_sample)
    print(pred, proba)
    pipeline.valid(X, y)
예제 #4
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_5():
    """
        基本形のコード
    """
    df = pd.read_csv('sample/boston.csv')
    y = df['Price']
    X = df.drop(['Price'], 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = Model()
    model.add_layer(DataFrameInputs())
    model.add_layer(PolynomialLayer())
    model.add_layer(StandardScalerLayer())
    model.add_layer(DuplicateLayer(n=1))
    model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.8,
                                      col_ratio=0.8, scale=True))
    model.add_layer(MultiheadRidgeLayer(n_models=5, row_ratio=0.8,
                                        col_ratio=0.8, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1.,
                                       col_ratio=1., scale=True))
    model.add_layer(AverageLayer())

    model.valid(X, y, n=3, cv='KFold')
예제 #5
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_4():
    df = pd.read_csv('sample/boston.csv')
    y = df['Price']
    X = df.drop(['Price'], 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = Model()
    model.add_layer(DataFrameInputs())
    model.add_layer(PolynomialLayer())
    model.add_layer(StandardScalerLayer())

    model.add_layer(DuplicateLayer(n=1))
    model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.8,
                                      col_ratio=0.8, scale=True))
    model.add_layer(MultiheadRidgeLayer(n_models=10, row_ratio=0.8,
                                        col_ratio=0.8, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1.,
                                       col_ratio=1., scale=True))
    model.add_layer(AverageLayer())

    model.train(X_train, y_train)
    X_pred = model.predict(X_test)
    print("Predict shape", X_pred.shape)
    model.summary()
    score = model.score(X_test, y_test)
    print(score)
예제 #6
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_10():
    df = pd.read_csv('sample/smi.csv')

    y = df['ESOL']
    X = df['smiles']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = Model()
    model.add_layer(SmilesInputs())
    model.add_layer(MultiheadGBRLayer(n_models=3, row_ratio=0.8,
                                      col_ratio=0.8, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1.,
                                       col_ratio=1., scale=True))
    model.add_layer(AverageLayer())
    model.valid(X, y, n=3, cv='KFold')
예제 #7
0
파일: test.py 프로젝트: horoiwa/ensemble
def test_3():
    df = pd.read_csv('sample/smi.csv')

    y = df['ESOL']
    X = df['smiles']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = Model()
    model.add_layer(SmilesInputs())
    model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.6,
                                      col_ratio=0.6, scale=True))
    model.add_layer(MultiheadRidgeLayer(n_models=30, row_ratio=0.9,
                                        col_ratio=0.9, scale=True))
    model.add_layer(MultiheadAggregateLayer())
    model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1.,
                                       col_ratio=1., scale=True))
    model.add_layer(AverageLayer())

    model.train(X_train, y_train)
    model.summary()
    score = model.score(X_test, y_test)
    print(score)