# print("Conditions: " , comb) print("Conditions shape: ", comb.shape) # print("Coefficients: " , models_b) print("Coefficients shape: ", models_b.shape) hyper_model = MultiOutputRegressor(make_pipeline(PolynomialFeatures(hyper_degree), linear_model.LinearRegression(fit_intercept=True, normalize=True))) ################################################################## # Predict hyper_model.fit(comb, models_b) pred = hyper_model.predict(comb) score = hyper_model.score(comb, models_b) print("Score: ", score) # Hyper-Model implementation else: coeffs = [] error_models = [] for i in range(comb.shape[0]): print('Target: ', comb[i, 0], ',', comb[i, 1]) y = beta.pdf(x, comb[i, 0], comb[i, 1])
random_state=0)) regr_multirf.fit(X_train, y_train) regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2) regr_rf.fit(X_train, y_train) # Predict on new data y_multirf = regr_multirf.predict(X_test) y_rf = regr_rf.predict(X_test) # Plot the results plt.figure() s = 50 a = 0.4 plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k', c="navy", s=s, marker="s", alpha=a, label="Data") plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k', c="cornflowerblue", s=s, alpha=a, label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test)) plt.scatter(y_rf[:, 0], y_rf[:, 1], edgecolor='k', c="c", s=s, marker="^", alpha=a, label="RF score=%.2f" % regr_rf.score(X_test, y_test)) plt.xlim([-6, 6]) plt.ylim([-6, 6]) plt.xlabel("target 1") plt.ylabel("target 2") plt.title("Comparing random forests and the multi-output meta estimator") plt.legend() # plt.show() pltshow(plt)
y = y_data.values x_pred = test.values x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=33) # model = DecisionTreeRegressor(max_depth =4) # max_depth 몇 이상 올라가면 구분 잘 못함 # model = RandomForestRegressor(n_estimators = 200, max_depth=3) # model = GradientBoostingRegressor() # model = XGBRegressor() model = MultiOutputRegressor(GradientBoostingRegressor()) model.fit(x_train, y_train) score = model.score(x_test, y_test) print(score) y4 = model.predict(test.values) #여기서 definition과 for 문을 써준 이유는 GB와 XGB에서는 스칼라 형태일때만 정보가 받아지기 때문에 저 두개의 모델을 구동시키기 위해서는 #현재 가지고 있는 데이터셋을 총 4번(4컬럼이니까) 으로 잘라줘서 스칼라의 형태로 만들어주는 것이다 . 이 for문은 그것을 진행해주기 위해서 있는것이다. #나머지 random forest와 decision tree는 스칼라의 형태로 구동을 하더라도 전혀 상관 없이 잘 구동된다. # y_predict = tree_fit(y_train, y_test) print(y4.shape) ''' # submission a = np.arange(10000,20000) submission = pd.DataFrame(y4, a) submission.to_csv('D:/Study/Bitcamp/Dacon/comp1/sub_GB.csv',index = True, header=['hhb','hbo2','ca','na'],index_label='id')
days = observe_days + predict_days for i in range(num_times - days +1): x_data.append(np.concatenate([infected[i:observe_days+i],deaths[i:observe_days+i],])) y_data.append(np.concatenate([infected[observe_days+i:days+i],deaths[observe_days+i:days+i]])) # %% split the datset in to train annd test x_data = np.array(x_data) y_data = np.array(y_data) from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=87) # %% load dataset in xgboost format from sklearn.multioutput import MultiOutputRegressor multioutputregressor = MultiOutputRegressor(xgb.XGBRegressor()).fit(x_train, y_train) # %% multioutputregressor.score(x_train,y_train) # %% multioutputregressor.score(x_test,y_test) #%% ((multioutputregressor.predict(x_test) - y_test)**2).sum() # %% test_target = 7777 #multioutputregressor.predict(x_test)[test_target] # %% import matplotlib.pyplot as plt plt.figure() plt.title('infected') plt.plot(np.arange(observe_days),x_test[test_target][0:observe_days],label='observe') plt.plot(np.arange(predict_days)+observe_days,y_test[test_target][0:predict_days],'-o',label='true') plt.plot(np.arange(predict_days)+observe_days,multioutputregressor.predict(x_test)[test_target][0:predict_days],'-o',label='predict') plt.legend()
y_test[:, 1], edgecolor="k", c="navy", s=s, marker="s", alpha=a, label="Data", ) plt.scatter( y_multirf[:, 0], y_multirf[:, 1], edgecolor="k", c="cornflowerblue", s=s, alpha=a, label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test), ) plt.scatter( y_rf[:, 0], y_rf[:, 1], edgecolor="k", c="c", s=s, marker="^", alpha=a, label="RF score=%.2f" % regr_rf.score(X_test, y_test), ) plt.xlim([-6, 6]) plt.ylim([-6, 6]) plt.xlabel("target 1") plt.ylabel("target 2")
#mit Python 3 ausfuehren import numpy as np training_data = np.loadtxt("/home/pi/git-repos/hello-world/training_data") x = training_data[:, 0:3] y = training_data[:, 3:5] from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import GradientBoostingRegressor x_train, x_test, y_train, y_test = train_test_split(x, y) print("x_train.shape", x_train.shape) print("y_train.shape", y_train.shape) reg = MultiOutputRegressor(GradientBoostingRegressor()).fit(x_train, y_train) print("reg.score(x_train, y_train)", reg.score(x_train, y_train)) print("reg.score(x_test, y_test)", reg.score(x_test, y_test)) output = reg.predict(x_test)
x_train = scaler.fit_transform(x_train) x_test = scaler.fit_transform(x_test) # 모델 구성 # model = XGBRegressor(max_depth=4) model = MultiOutputRegressor( XGBRegressor(random_state=12, n_estimators=500, max_depth=30)).fit(x_train, y_train) # 훈련 model.fit(x_train, y_train) y_pred = model.predict(x_test) score = model.score(y_test, y_pred) print("score : ", score) # 평가 및 예측 # loss, mse = model.evaluate(x_test, y_test, batch_size=1) x_predict = np.array([[795, 1550, 1746, 1690]]) x_predict = scaler.fit_transform(x_predict) y_real = np.array([777, 1559, 1762, 1659]) y_predict = model.predict(x_predict) # print("loss : ", loss) # print("mse : ", mse)
def main(): ##PRIMERO print("######### REGRESION MULTISALIDA #########") #Read data X=np.loadtxt('fileEnd_X.pos') y=np.loadtxt("fileEnd_Y.pof") #Split on training set and test set X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2) #Training model clf=MultiOutputRegressor(Ridge(random_state=123)).fit(X_train, y_train) #Does prediction Y_pred = clf.predict(X_test) #Values of test set print(clf.score(X_train,y_train)) # Creating dataset predition test z = Y_pred[:,0] x = Y_pred[:,1] y = Y_pred[:,2] # Creating dataset test test z1 = y_test[:,0] x1 = y_test[:,1] y1 = y_test[:,2] # Creating figure fig = plt.figure(figsize = (10, 7)) ax = plt.axes(projection ="3d") # Creating plot ax.scatter3D(x, y, z, color = "blue", alpha=0.1) ax.scatter3D(x1, y1, z1, color = "red") plt.title("Results predict Y and Test set") # show plot plt.show() ##SEGUNDO print("######### REGRESION POLINOMIAL #########") ##Preparar la data data = pd.read_csv(filename,sep="\t",header=0) # Entendimiento de la data #print('Informacion del data set') #print(data.shape) #print(data.head(78)) #print(data.columns) #### PREPARAR DATA PARA REGRESION POLINOMIAL ### #Defino entradas X Solamente la columna 6 X_p = data['age'] #Defino Y y_p = data['length'] #Defino el algoritmo a usar pr = linear_model.LinearRegression() #Definir grado del polinomio print("Ingrese el valor para degree") input_degree = input() deg = int(input_degree) poli_reg = PolynomialFeatures(degree = deg) precision = 0 data = data.values k_iterations = 100 n_size = len(data) print("n_size", n_size) #Comienza bootstraping for i in range(k_iterations): train = resample(data,n_samples = n_size) test = np.array([x for x in data if x.tolist() not in train.tolist()]) X_train = train[:,0].reshape(-1,1) #0 seria age 1 seria length y_train = train[:,1].reshape(-1,1) X_test = test[:,0].reshape(-1,1) y_test = test[:,1].reshape(-1,1) X_train_poli = poli_reg.fit_transform(X_train) X_test_poli = poli_reg.fit_transform(X_test) #entrenar pr.fit(X_train_poli,y_train) #precision Y_pred_pr = pr.predict(X_test_poli) #print("Datos reales") #print(y_test) #print("Datos obtenidos") #print(Y_pred_pr) #Calculo precision cada iteracion de bootstraping precision += pr.score(X_train_poli, y_train) plt.scatter(X_p,y_p) #Data set completo plt.scatter(X_test,Y_pred_pr,color="red",linewidth=3) plt.show() print("Precision") print(precision/k_iterations) mse = mean_squared_error(y_test,Y_pred_pr) print("MSE ", mse)
SEQUENCE = np.load('SequenceArray.npz')['SequenceArray'] #載入矩陣 print(SEQUENCE[0]) print('Data loaded') print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())) NUMBER = len(SEQUENCE) SEQUENCE2 = [] for i in range(int(NUMBER / 2)): SEQUENCE2.append(SEQUENCE[2 * i]) #Step = 3 SEQUENCE3 = [] for i in range(int(NUMBER / 3)): SEQUENCE3.append(SEQUENCE[3 * i]) #def GetSequence() BASIC_SEQUENCE = np.zeros((NUMBER - Frames, Weight * Height)) NEXT_SEQUENCE = np.zeros((NUMBER - Frames, Frames * Weight)) for i in range(Frames): print(i) BASIC_SEQUENCE[:, i] = SEQUENCE[i:i + NUMBER - Frames] NEXT_SEQUENCE[:, i] = SEQUENCE[i + 1:i + NUMBER - Frames + 1] train_X, test_X, train_y, test_y = train_test_split(BASIC_SEQUENCE[:10], NEXT_SEQUENCE[:10], test_size=0.2, random_state=4) model = MultiOutputRegressor(LinearSVR(loss='mean_square_error', C=1.0)) model.fit(train_X, train_y) score = model.score(test_X, test_y) train_loss = mean_squared_error(train_X, test_y) val_loss = mean_squared_error(test_X, test_y) print("Score", score) print("train loss: %.4f - val_loss: %.4f" % (train_loss, val_loss))
def run_sklearn_poly(filename): dataframe = pandas.read_csv(filename, header=None) dataset = dataframe.values X = dataset[:,0].astype(float) # Y = dataset[:,1:].astype(float) # Y = dataset[:,257:].astype(float) # predict just one curve Y = dataset[:,1:].astype(float) # predict just one curve m = dataframe.shape[0] # ROWS or test samples X_test = X[m-1] Y_test = Y[m-1] # preprocess # min_max_scaler = preprocessing.MinMaxScaler() # min_max_scaler.fit(X) # X = min_max_scaler.transform(X) # min_max_scaler = preprocessing.MinMaxScaler() # min_max_scaler.fit(Y) # Y = min_max_scaler.transform(Y) # print(Y) # Y = Y.reshape(-1, 1) # 1000: 56 # 2000: 60 ## X = X.reshape(-1, 1) # X = np.sort(5 * np.random.rand(40, 1), axis=0) # Y = np.sin(X).ravel() # print(X) # print(Y) # model = make_pipeline(PolynomialFeatures(500), Ridge()) # svr_rbf = SVR(kernel='rbf') svr_multi = MultiOutputRegressor(SVR(kernel='rbf', C=1e6),n_jobs=-1) svr_multi.fit(X, Y) y_rbf = svr_multi.predict(X) # svr_rbf = SVR(kernel='rbf', C=10000) # y_rbf = svr_rbf.fit(X, Y).predict(X) # svr_poly = SVR(kernel='poly', C=1e3, degree=2) # Score print("SCORE=%.2f" % svr_multi.score(X, y_rbf)) f, subplots = plt.subplots(2) # model.fit(X, Y) subplots[0].plot(Y[2]) # subplots[1].plot(Y) Out = np.fft.ifft(Y[2]) Out = Out * 100 subplots[1].plot(Out) # mx = np.max(Out) mx = 32767 audio = np.fromiter((s * mx for s in Out), dtype=np.int16) wavio.write('out.wav', audio, 44100) # plt.plot(X, y_rbf) # plt.plot(X, Y) np.savetxt("predsvm.csv", y_rbf, fmt='%.7f', delimiter=",") # Plot against freqs Fs = 44100 samples = 512 f = Fs * np.mgrid[0:512/2 + 1]/512 # plt.plot(Y) plt.show()
class HyperProcessModel: def __init__(self): self.in_shape = None self.shapes = [] self.conditions = [] self.b_params = [] self.degree = -1 self.optimization = False self.hyper_model = False # HPM def decomposition(self): """ Performs the decomposition of all shapes using eigenvectors :return: all eigenvalues, all eigenvectors """ print('Start Decomposing...') models = self.shapes.transpose() covariance = np.cov(models) eigenvalues, eigenvectors = np.linalg.eig(covariance) print('Decomposing complete!') return eigenvalues.real, eigenvectors.real # gives preference to the max_variance def get_suitable_eigen(self, eigenvals, n_components=None, max_variance=0.95): """ Select the most suitable eigenvectors to use in the SSM. In this particular case, when both number of components and maximum variance are specified, preference is given to variance. Iterating through all eigenvectors, if it first reaches the target variance, it returns the corresponding eigenvectors, if not, if it reaches the number of components, it return the corresponding eigenvectors. :param eigenvals: array with all eigenvalurs :param n_components: int, default=None - number of components to be included :param max_variance: float, default=0.95 - variance to be reached :return: int - number of suitable components """ sum_eigenvals = sum(eigenvals) variance = eigenvals / sum_eigenvals variance = [ value if np.abs(value) > 0.00001 else 0 for value in variance ] comulative_sum = 0 for i in range(0, len(eigenvals)): comulative_sum += variance[i] # if (comulative_sum >= variance_max and i > 0): if (comulative_sum >= max_variance): return i + 1 if n_components is not None: if i + 1 == n_components: return i + 1 return len(eigenvals) def get_b_param(self, mean, shape, evec): """ According to SSM, the b parameters are the deformable parameters that allow to reproduce back the original shape using the decomposed shapes (eigenvectors) and mean shape :param mean: array (nsamples*nfeatures) - array with all values for mean shape. :param shape: array (nsamples*nfeatures) - shape used to calculate b parameters from SSM :param evec: ndarray - eigenvectors to be used for the b paramters transformation :return: array - b paramters / deformable parameters for the corresponding shape """ sub = (shape - mean) return np.dot(np.transpose(evec), np.transpose(sub)) def get_in_shape(self): """ Return the input used to generate shapes for all process models :return: ndarray - (nsamples, nfeatures) """ return self.in_shape def generate_shape(self, b): """ Based on a deformable parameter (b), generates the corresponding shape :param b: array - set of deformable parameters :return: array (nsamples*nfeatures) - generated shape that needs to be reshaped as (nsamples, nfeatures) """ return self.mean_shape + np.transpose(np.dot(self.eigenvectors, b)) def set_pol_degree(self, degree): """ Set the polynomial degree for the hyper model :param degree: int :return: None """ self.degree = degree def stochastic_factorial_design(self, granularity, n_samples, min, max): """ Creates a combination of values, bounded to a minimum and maximum, for a "level" number of combinations :param granularity: int - level of detail (granularity) or steps in each feature. Linear space between min and max with n "granularity" values. :param n_samples: int - number of samples to be produced :param min: array (nfeatures) - minimum value for all features :param max: array (nfeatures) - maximum value for all features :return: ndarray - all combinations """ if granularity < 2: print('Granularity provided is less than 2') elif len(np.array(min)) == 1: self.in_shape = np.linspace(min, max, granularity).transpose()[0] else: array_temp = np.array([]) factor = len(min) for i in range(factor): array_temp = np.append( array_temp, np.linspace(min[i], max[i], granularity)) matrix_temp = array_temp.reshape(factor, granularity) combo = matrix_temp my_sample = [] while len(my_sample) < n_samples: # Choose one random item from each list; that forms an element elem = [ comp[random.randint(0, len(comp) - 1)] for comp in combo ] # Using a set elminates duplicates easily my_sample.append(elem) result = np.matrix(my_sample) self.in_shape = result return self.in_shape def add_shape(self, shape): """ Adds a shape to be used in the SSM. Conditions should be added in the same order as shapes :param shape: array (nsamples*nfeatures) - shape to be added :return: None """ # Sample 100 datapoints from the trained source models - Produce the shapes if len(self.shapes) == 0: self.shapes = np.matrix(shape) else: self.shapes = np.vstack((self.shapes, shape)) def add_condition(self, cond): """ Adds a certain condition to be used by the hyper model. Shapes should be added in the same order as conditions :param cond: array - conditions :return: None """ # Sample 100 datapoints from the trained source models - Produce the shapes if len(self.conditions) == 0: self.conditions = np.matrix(cond) else: self.conditions = np.vstack((self.conditions, cond)) def get_mean_shape(self): """ Calculates and returns the mean shape based on all previously added shapes. :return: array (nsamples*nfeatures) - Mean shape for SSM """ self.mean_shape = np.mean(self.shapes, axis=0) return self.mean_shape def get_eigen(self, n_components, max_variance): """ Calculates all eigevectors and eigenvalues and returns only the most suitable ones. Meanwhile, all deformable parameters are calculated for all available shapes. This is a combination of previously existing functions to automate the calculation process. :param n_components: int - number of components :param max_variance: float - variance to be reaches :return: eigenvalues (array), eigenvectors (ndarray) """ # eigenvectors self.eigenvalues, self.eigenvectors = self.decomposition() # suitable eigenvectors and not all of them modes_def = self.get_suitable_eigen(self.eigenvalues, n_components, max_variance) # filter suitable eigenvectors self.eigenvalues = self.eigenvalues[0:modes_def] self.eigenvectors = np.transpose( np.transpose(self.eigenvectors)[0:modes_def]) ########################################## # Calculate B params for i in range(len(self.shapes)): if len(self.b_params) == 0: self.b_params = np.matrix( self.get_b_param(self.mean_shape, self.shapes[i], self.eigenvectors)).transpose() else: self.b_params = np.vstack( (self.b_params, self.get_b_param(self.mean_shape, self.shapes[i], self.eigenvectors).transpose())) ########################################## return self.eigenvalues, self.eigenvectors def train_hyper_model(self, n_components=None, max_variance=0.95): """ Train the hyper model. The normal and most interesting scenario is when the number of conditions (c) is higher than the number of deformable parameters (b), so the hyper model can be trained as such h: c -> b. However, it might be case that b is higher than c, so, ideally the model would be trained as such h: b -> c. In this case, either 1) the inverse of h needs to be calculated or 2) an optimization problem needs to be formulated to estimate b based on a target c. To ease this latter case, a MultiOutputRegressor is used for hyper model, meaning that a model per target is trained, so the case of b being higher than c is no longer a problem. :param n_components: int - number of components :param max_variance: float - variance :return: float - score (R^2) of the trained model (do not mistake with error) """ self.get_mean_shape() self.get_eigen(n_components, max_variance) if self.degree == -1: print("Please define first the degree of the Hyper Model") exit() self.hyper_model = MultiOutputRegressor( make_pipeline( PolynomialFeatures(self.degree), linear_model.LinearRegression(fit_intercept=True, normalize=True))) print("Dimension of conditions:", self.conditions.shape[1]) print("Dimension of b parameters:", self.b_params.shape[1]) ################################################################## # Predict self.hyper_model.fit(self.conditions, self.b_params) score = self.hyper_model.score(self.conditions, self.b_params) return score def predict(self, new_cond): """ Makes a prediction of the deformable parameters to be used in the SSM based on the new conditions provided. :param new_cond: array - new conditions :return: array - deformable parameters """ return self.hyper_model.predict(new_cond) def get_new_shape(self, new_cond): """ Based on the new conditions, returns the generated shape to be used for further training. :param new_cond: array - new conditions :return: array (nsamples*nfeatures) - new generated shape """ result_def = self.predict(new_cond)[0] new_gen_shape = self.generate_shape(result_def) new_gen_shape = np.array(new_gen_shape)[0] return new_gen_shape def set_hyper_model(self, model): """ This function should be used if a new method needs to be used for the hyper model instead of the default polynomial. :param model: Predictor :return: None """ self.hyper_model = model
import pandas as pd # Pre-processing from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures # Regressors from sklearn.ensemble import ExtraTreesRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LinearRegression from sklearn.linear_model import RidgeCV from sklearn.ensemble import RandomForestRegressor from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import GradientBoostingRegressor # Load data from file TimeConstraint = pd.read_csv("../Data/time_constraint.csv") le = LabelEncoder() TimeConstraint.topology = le.fit_transform(TimeConstraint.topology) x = TimeConstraint.throughput y = TimeConstraint.drop(['throughput'], axis=1) x = np.array(x).reshape(-1, 1) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0) clf = MultiOutputRegressor(RandomForestRegressor(random_state=0)) clf.fit(x, y) print(clf.predict([[47817.84]])) print(clf.score(x_test, y_test))
print len(X.columns) y = df.iloc[:, -33:] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #X_train, explainedVarianceRatio, X_test = principalComponent(X_train, X_test) # rf1 = RandomForestRegressor(n_estimators = 500, n_jobs = -1, random_state = 42) # rf1.fit(X, y.iloc[:,0]) regressor = MultiOutputRegressor(RandomForestRegressor()) regressor.fit(X_train, y_train) score = regressor.score(X_test, y_test) print score # feat_imp = pd.DataFrame({'importance': rf1.feature_importances_}) # feat_imp['Feature Index'] = X.columns # feat_imp.sort_values(by = 'importance', ascending = True, inplace = True) '''plt.barh(range(len(feat_imp)), feat_imp['importance'], color = 'b', align = 'center') plt.yticks(range(len(feat_imp)), feat_imp['Feature Index'], size = 6) plt.xlabel('Feature importance') plt.ylabel('Index') plt.tight_layout() plt.savefig('feat_imp1.jpg', format = 'jpg', dpi = 500)'''
X_test, y_test = df_test.loc[:, [ 'voltage_min', 'cycle_count', 'soc', 'temperature_max' ]], df_test.loc[:, ['age', 'capacity']] SVR_model = SVR() model = MultiOutputRegressor(estimator=SVR_model) print(model) import time start_time = time.time() model.fit(X_train, y_train) score = model.score(X_train, y_train) #print("Training score:", score) preds_train = model.predict(X_train) preds_test = model.predict(X_test) score = model.score(X_train, y_train) #print("Training score:", score) ypred_X_test = model.predict(X_test) ypred_X_train = model.predict(X_train) y_test = pd.DataFrame(y_test) ypred_X_test = pd.DataFrame(ypred_X_test) ypred_X_train = pd.DataFrame(ypred_X_train)
""" EFS ========== """ import numpy as np from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from sklearn.multioutput import MultiOutputRegressor from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sparsereg.model.efs import EFS x, y = make_regression(n_samples=1000, n_features=10, n_informative=10, n_targets=3) x_train, x_test, y_train, y_test = train_test_split(x, y) steps = ("scaler", StandardScaler()), ("estimator", EFS(mu=1, q=3, max_stall_iter=5)) model = MultiOutputRegressor(Pipeline(list(steps))) model.fit(x_train, y_train) print(model.score(x_test, y_test))