def test(): df = pd.read_csv('sample/boston.csv') y = df['Price'] X = df.drop(['Price'], 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = Model() model.add_layer(DataFrameInputs()) model.add_layer(PolynomialLayer()) model.add_layer(StandardScalerLayer()) model.add_layer(EnsembleGBRLayer(n_models=10, row_ratio=0.5, col_ratio=0.5, scale=True)) model.add_layer(EnsembleRidgeLayer(n_models=30, row_ratio=0.6, col_ratio=0.6, scale=True)) model.add_layer(AverageLayer()) model.train(X_train, y_train) model.summary() model.save('sample/test.pkl') with open('sample/test.pkl', 'rb') as f: model = pickle.load(f) X_pred = model.predict(X_test) print("Predict shape", X_pred.shape) X_pred = model.predict_proba(X_test) print("Predict_proba shape", X_pred[1].shape) score = model.score(X_test, y_test) print(score)
def test_8(): print("test7") df = pd.read_csv('sample/boston2.csv') y = df['Price'] X = df.drop(['Price'], 1) model = Model() model.add_layer(DataFrameInputs()) model.add_layer(DuplicateLayer(n=1)) model.add_layer(MultiheadRidgeLayer(n_models=300, row_ratio=1., col_ratio=0.1, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=100, row_ratio=1., col_ratio=0.3, scale=True)) model.add_layer(AverageLayer()) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) pipeline = ClusterDataset(model, n_poly=2, n_cluster=3) pipeline.train(X_train, y_train) pred, proba = pipeline.predict_proba(X_test) plt.scatter(y_test.values, pred) plt.show()
def test_7(): print("test7") df = pd.read_csv('sample/boston2.csv') y = df['Price'] X = df.drop(['Price'], 1) model = Model() model.add_layer(DataFrameInputs()) model.add_layer(DuplicateLayer(n=1)) model.add_layer(MultiheadSVRLayer(n_models=10, row_ratio=0.8, col_ratio=0.6, scale=True)) model.add_layer(MultiheadRidgeLayer(n_models=3, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1., col_ratio=1., scale=True)) model.add_layer(AverageLayer()) pipeline = TableDataset(model, n_poly=2, prescale=False) pipeline.train(X, y) X_sample = X.iloc[0, :] pred, proba = pipeline.predict_proba(X_sample) print(pred, proba) pipeline.valid(X, y)
def test_5(): """ 基本形のコード """ df = pd.read_csv('sample/boston.csv') y = df['Price'] X = df.drop(['Price'], 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = Model() model.add_layer(DataFrameInputs()) model.add_layer(PolynomialLayer()) model.add_layer(StandardScalerLayer()) model.add_layer(DuplicateLayer(n=1)) model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadRidgeLayer(n_models=5, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1., col_ratio=1., scale=True)) model.add_layer(AverageLayer()) model.valid(X, y, n=3, cv='KFold')
def test_4(): df = pd.read_csv('sample/boston.csv') y = df['Price'] X = df.drop(['Price'], 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = Model() model.add_layer(DataFrameInputs()) model.add_layer(PolynomialLayer()) model.add_layer(StandardScalerLayer()) model.add_layer(DuplicateLayer(n=1)) model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadRidgeLayer(n_models=10, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1., col_ratio=1., scale=True)) model.add_layer(AverageLayer()) model.train(X_train, y_train) X_pred = model.predict(X_test) print("Predict shape", X_pred.shape) model.summary() score = model.score(X_test, y_test) print(score)
def test_10(): df = pd.read_csv('sample/smi.csv') y = df['ESOL'] X = df['smiles'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = Model() model.add_layer(SmilesInputs()) model.add_layer(MultiheadGBRLayer(n_models=3, row_ratio=0.8, col_ratio=0.8, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1., col_ratio=1., scale=True)) model.add_layer(AverageLayer()) model.valid(X, y, n=3, cv='KFold')
def test_3(): df = pd.read_csv('sample/smi.csv') y = df['ESOL'] X = df['smiles'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = Model() model.add_layer(SmilesInputs()) model.add_layer(MultiheadGBRLayer(n_models=50, row_ratio=0.6, col_ratio=0.6, scale=True)) model.add_layer(MultiheadRidgeLayer(n_models=30, row_ratio=0.9, col_ratio=0.9, scale=True)) model.add_layer(MultiheadAggregateLayer()) model.add_layer(EnsembleRidgeLayer(n_models=1, row_ratio=1., col_ratio=1., scale=True)) model.add_layer(AverageLayer()) model.train(X_train, y_train) model.summary() score = model.score(X_test, y_test) print(score)