def main(): # read data for train train = pd.read_csv('data_for_model/new_with_price_per_sqm/training_data.csv') LB = LabelBinarizer() train['town'] = LB.fit_transform(train['town']) train['flat_model'] = LB.fit_transform(train['flat_model']) labels = train.iloc[:,20:].values total_price = train.iloc[:,19:20].values features = train.iloc[:,:19].values floor_area = np.asarray(train['floor_area_sqm'].values).reshape(len(labels),1) # preprocess training data X_train = preprocessing_X(features) scaler_y_train, y_train = preprocessing_Y(labels) # read in test data test = pd.read_csv('data_for_model/new_with_price_per_sqm/test_data.csv') test['town'] = LB.fit_transform(test['town']) test['flat_model'] = LB.fit_transform(test['flat_model']) labels_test = test.iloc[:,20:].values total_price_test = test.iloc[:,19:20].values features_test = test.iloc[:,:19].values floor_area_test = np.asarray(test['floor_area_sqm'].values).reshape(len(labels_test),1) # preprocess test data X_test = preprocessing_X(features_test) scaler_y_test, y_test = preprocessing_Y(labels_test) # fine_tune # fine_tune(X_train, y_train, scaler_y, floor_area, total_price) # train on all training data with best hyper-params #model = build_model() #result = model.fit(X_train, y_train, epochs=300, batch_size = int(len(X_train)/256), verbose=1, shuffle=False) # train on all training data with best hyper-params with pdp model = KerasRegressor(build_model, epochs=300, batch_size = int(len(X_train)/256), verbose=1, shuffle=False) model._estimator_type = "regressor" # Cheap workaround for keras NN to work with plot_partial_dependence model.dummy_ = "dummy" # Cheap workaround for keras NN to work with plot_partial_dependence model.fit(X_train, y_train) print('Computing partial dependence plots...') tic = time() pdp_features = [10] # remaining_lease, dist_nearest_mrt, dist_nearest_supermarkets, dist_nearest_sports_facilities display = plot_partial_dependence(estimator=model, X=X_train, features=pdp_features, kind='both', subsample=500, random_state=0, verbose=10) print(f"done in {time() - tic:.3f}s") display.figure_.suptitle( 'Plot' ) display.figure_.subplots_adjust(hspace=0.3) plt.show() # get score for validation #score = get_score(scaler_y_train.inverse_transform(model.predict(X_train)) * floor_area, total_price) #print('score on validation = {}'.format(score)) # predict y values for test data #val_res = scaler_y_test.inverse_transform(model.predict(X_test)) # get performance score on test data #score = get_score(val_res * floor_area_test, total_price_test) # score = get_score(val_res, total_price_test) #print('score on test = {}'.format(score)) '''
model.add(Dense(1, activation='linear')) model.compile(optimizer='adam', loss='mean_squared_error') return model def build_model3(): model = Sequential() model.add(Dense(32, activation='relu', input_shape=(13, ))) model.add(Dense(16, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(optimizer='adam', loss='mean_squared_error') return model if __name__ == '__main__': (x_train, y_train), (x_test, y_test) = load_data() model1 = KerasRegressor(build_fn=build_model1, epochs=100, batch_size=64) model1._estimator_type = "regressor" model2 = KerasRegressor(build_fn=build_model2, epochs=100, batch_size=64) model2._estimator_type = "regressor" model3 = KerasRegressor(build_fn=build_model3, epochs=100, batch_size=64) model3._estimator_type = "regressor" cls = VotingRegressor(estimators=[('model1', model1), ('model2', model2), ('model3', model3)]) cls.fit(x_train, y_train) joblib.dump(cls, "sklearn-regressor.h5") print("score: ", cls.score(x_test, y_test))