def train_model(X_df, y_array, skf_is): fe = FeatureExtractor() fe.fit(X_df, y_array) X_array = fe.transform(X_df) # Regression train_is, _ = skf_is X_train_array = np.array([X_array[i] for i in train_is]) y_train_array = np.array([y_array[i] for i in train_is]) reg = Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def train_model(X_df, y_array, skf_is): fe = FeatureExtractor() fe.fit(X_df, y_array) X_array = fe.transform(X_df) # Regression train_is, _ = skf_is X_train_array = np.array([X_array[i] for i in train_is]) y_train_array = np.array([y_array[i] for i in train_is]) reg = Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def test_split_node(self): regressor = Regressor() regressor.fit(x, y) print(regressor.predict(x_valid))
T0 = time() print "load dataset..." X_df_2011 = pd.DataFrame.from_csv("datasets/2011.csv") X_df_2012 = pd.DataFrame.from_csv("datasets/2012.csv") X_df_2013 = pd.DataFrame.from_csv("datasets/2013.csv") X_df = pd.concat([X_df_2011, X_df_2012, X_df_2013], axis=0) print "load dates..." with open("target_dates_1.pkl") as f: dates = pickle.load(f) # date n1677, n3051 and n3451 cause trouble dates = dates.delete([1677, 3051, 3451]) sub = load_submission("data/submission.txt") pred_dates = sub.index fit_dates = load_all_data().index fit_dates = fit_dates.delete(range(18024)) # hack print "make the prediction..." # make prediction reg = Regressor() reg.fit(fit_dates) pred = reg.predict(pred_dates) print "acquire the true value..." target = X_df.loc[dates] print "compute error..." # get the error err = get_error_dfs(pred, target) print "LinExp error: ", err, "run in :", time() - T0, "s"
Y_train=np.array(Y_train) X_train=np.array(X_train) X_test=np.array(X_test) #### Creation of regressor reg=Regressor() #### Cross validation print "Cross validation ..." #loo = cross_validation.LeaveOneOut(len(y_df)) loo=10 scores = cross_validation.cross_val_score(reg, X_train, Y_train, scoring='mean_squared_error', cv=loo,) print "The score mean of cross validation : " print scores.mean() #### fit print "Fit ..." reg.fit(X_train, Y_train) #### Prediction print "Prediction ..." Y_pred = reg.predict(X_test) #### write the submission print "Write the submission ..." make_submission(dataTest,Y_pred) print "End."
# regressorA = linear_model.BayesianRidge() regressorA4 = linear_model.LinearRegression() regressorA5 = linear_model.PassiveAggressiveRegressor() # regressorA = linear_model.SGDRegressor() # regressorA = linear_model.Lasso() # regressorA = linear_model.RANSACRegressor() # regressorA = RadiusNeighborsRegressor(radius=1.0) # regressorA = KNeighborsRegressor(n_neighbors=4) regressorB = MetaRegressor([regressorB2]) regressorA = MetaRegressor( [regressorA1, regressorA2, regressorA3, regressorA4, regressorA5]) baseRegressor = linear_model.LinearRegression() regressor = Regressor(regressorA, regressorB, baseRegressor) regressor.fit(historic_data_set, target_data_set) # plot the trained models against the data they were trained on # together with least squares measures(in order to experiment with diff linear models) predict_base, predict_anomaly, predict_total, predict_dummy = regressor.predict( historic_data_set) plt.figure(1) plt.subplot(311) plt.plot(predict_total, label="total") plt.plot(predict_base, label="base") plt.plot(predict_anomaly, label="anomaly") plt.plot(target_data_set, label="target") plt.plot(predict_dummy, label="dummy") plt.grid(True)
from regressor import Regressor from feature_extractor import FeatureExtractor df_features = df.drop('target', axis=1) y = df.target.values df_train, df_test, y_train, y_test = train_test_split(df_features, y, test_size=0.5, random_state=42) feature_extractor = FeatureExtractor() model = Regressor() X_train = feature_extractor.transform(df_train) model.fit(X_train, y_train) X_test = feature_extractor.transform(df_test) y_pred = model.predict(X_test) print('RMSE = ', np.sqrt(mean_squared_error(y_test, y_pred))) imputer = model.clf.named_steps['imputer'] valid_idx = imputer.transform(np.arange(df_train.shape[1])).astype(np.int) et = model.clf.named_steps['extratreesregressor'] feature_importances = pd.DataFrame(data=et.feature_importances_, index=df_train.columns[valid_idx][0]) feature_importances['counts'] = df_train.count()[valid_idx][0] feature_importances.to_csv('feature_importance.csv')
print "Loading the X test ..." set_X_test=[] i=0 while i < len(sub_data['cod_ASS_ASSIGNMENT'].unique()): set_X_test.append(sub_test[features][sub_test['cod_ASS_ASSIGNMENT' ]==(i)]) i=i+1 i=0 listPred=[] score_cv_global=[] while i<len(set_X_train): scaler = pre.StandardScaler().fit(set_X_train[i][features_train]) X_train_scaled = scaler.transform(set_X_train[i][features_train]) print " Train et Predict the categorie : ",i reg=Regressor() reg.fit(X_train_scaled, set_Y_train[i]) #### Cross validation #print "Cross validation ...", i #loo = cross_validation.LeaveOneOut(len(y_df)) #loo=10 #scores = cross_validation.cross_val_score(reg, X_train_scaled, set_Y_train[i], scoring='neg_mean_squared_error', cv=loo,) #print "The score mean of cross validation : ", scores.mean() #score_cv_global.append(scores.mean()) if(len(set_X_test[i])>0): X_test_scaled = scaler.transform(set_X_test[i][features_train]) listPred.append( reg.predict(X_test_scaled)) i=i+1