コード例 #1
0
ファイル: unit test.py プロジェクト: xaviercallens/OneTeam
def train_model(X_df, y_array, skf_is):
    fe = FeatureExtractor()
    fe.fit(X_df, y_array)
    X_array = fe.transform(X_df)
    # Regression
    train_is, _ = skf_is
    X_train_array = np.array([X_array[i] for i in train_is])
    y_train_array = np.array([y_array[i] for i in train_is])
    reg = Regressor()
    reg.fit(X_train_array, y_train_array)
    return fe, reg
コード例 #2
0
def train_model(X_df, y_array, skf_is):
    fe = FeatureExtractor()
    fe.fit(X_df, y_array)
    X_array = fe.transform(X_df)
    # Regression
    train_is, _ = skf_is
    X_train_array = np.array([X_array[i] for i in train_is])
    y_train_array = np.array([y_array[i] for i in train_is])
    reg = Regressor()
    reg.fit(X_train_array, y_train_array)
    return fe, reg
コード例 #3
0
ファイル: tests.py プロジェクト: skwwt88/xgboost-from-scratch
    def test_split_node(self):
        regressor = Regressor()
        regressor.fit(x, y)

        print(regressor.predict(x_valid))
コード例 #4
0
T0 = time()

print "load dataset..."
X_df_2011 = pd.DataFrame.from_csv("datasets/2011.csv")
X_df_2012 = pd.DataFrame.from_csv("datasets/2012.csv")
X_df_2013 = pd.DataFrame.from_csv("datasets/2013.csv")
X_df = pd.concat([X_df_2011, X_df_2012, X_df_2013], axis=0)

print "load dates..."
with open("target_dates_1.pkl") as f:
    dates = pickle.load(f)
    # date n1677, n3051 and n3451 cause trouble
    dates = dates.delete([1677, 3051, 3451])
sub = load_submission("data/submission.txt")
pred_dates = sub.index
fit_dates = load_all_data().index

fit_dates = fit_dates.delete(range(18024))  # hack
print "make the prediction..."
# make prediction
reg = Regressor()
reg.fit(fit_dates)
pred = reg.predict(pred_dates)
print "acquire the true value..."
target = X_df.loc[dates]
print "compute error..."
# get the error
err = get_error_dfs(pred, target)
print "LinExp error: ", err, "run in :", time() - T0, "s"
コード例 #5
0
Y_train=np.array(Y_train)
X_train=np.array(X_train)
X_test=np.array(X_test)


#### Creation of regressor 
reg=Regressor()


#### Cross validation
print "Cross validation ..."
#loo = cross_validation.LeaveOneOut(len(y_df))
loo=10
scores = cross_validation.cross_val_score(reg, X_train, Y_train, scoring='mean_squared_error', cv=loo,)
print "The score mean of cross validation : "
print scores.mean()

#### fit 
print "Fit ..."
reg.fit(X_train, Y_train)


#### Prediction
print "Prediction ..."
Y_pred = reg.predict(X_test)

#### write the submission
print "Write the submission ..."
make_submission(dataTest,Y_pred)

print "End."
コード例 #6
0
ファイル: main.py プロジェクト: BavoGoosens/Capita4
# regressorA = linear_model.BayesianRidge()
regressorA4 = linear_model.LinearRegression()
regressorA5 = linear_model.PassiveAggressiveRegressor()
# regressorA = linear_model.SGDRegressor()
# regressorA = linear_model.Lasso()
# regressorA = linear_model.RANSACRegressor()
# regressorA = RadiusNeighborsRegressor(radius=1.0)
# regressorA = KNeighborsRegressor(n_neighbors=4)

regressorB = MetaRegressor([regressorB2])
regressorA = MetaRegressor(
    [regressorA1, regressorA2, regressorA3, regressorA4, regressorA5])
baseRegressor = linear_model.LinearRegression()

regressor = Regressor(regressorA, regressorB, baseRegressor)
regressor.fit(historic_data_set, target_data_set)

# plot the trained models against the data they were trained on
# together with least squares measures(in order to experiment with diff linear models)

predict_base, predict_anomaly, predict_total, predict_dummy = regressor.predict(
    historic_data_set)

plt.figure(1)
plt.subplot(311)
plt.plot(predict_total, label="total")
plt.plot(predict_base, label="base")
plt.plot(predict_anomaly, label="anomaly")
plt.plot(target_data_set, label="target")
plt.plot(predict_dummy, label="dummy")
plt.grid(True)
コード例 #7
0
from regressor import Regressor
from feature_extractor import FeatureExtractor

df_features = df.drop('target', axis=1)
y = df.target.values

df_train, df_test, y_train, y_test = train_test_split(df_features, y, test_size=0.5, random_state=42)


feature_extractor = FeatureExtractor()
model = Regressor()


X_train = feature_extractor.transform(df_train)
model.fit(X_train, y_train)

X_test = feature_extractor.transform(df_test)
y_pred = model.predict(X_test)
print('RMSE = ', np.sqrt(mean_squared_error(y_test, y_pred)))


imputer = model.clf.named_steps['imputer']

valid_idx = imputer.transform(np.arange(df_train.shape[1])).astype(np.int)
et = model.clf.named_steps['extratreesregressor']

feature_importances = pd.DataFrame(data=et.feature_importances_,
                                   index=df_train.columns[valid_idx][0])
feature_importances['counts'] = df_train.count()[valid_idx][0]
feature_importances.to_csv('feature_importance.csv')
コード例 #8
0
	print "Loading the X test ..."
	set_X_test=[]
	i=0
	while i < len(sub_data['cod_ASS_ASSIGNMENT'].unique()):
		set_X_test.append(sub_test[features][sub_test['cod_ASS_ASSIGNMENT' ]==(i)])
		i=i+1

	i=0
	listPred=[]
	score_cv_global=[]
	while i<len(set_X_train):
		scaler = pre.StandardScaler().fit(set_X_train[i][features_train])
		X_train_scaled = scaler.transform(set_X_train[i][features_train])
		print " Train et Predict the categorie : ",i
		reg=Regressor()
		reg.fit(X_train_scaled, set_Y_train[i])


		#### Cross validation
		#print "Cross validation ...", i
		#loo = cross_validation.LeaveOneOut(len(y_df))
		#loo=10
		#scores = cross_validation.cross_val_score(reg, X_train_scaled, set_Y_train[i], scoring='neg_mean_squared_error', cv=loo,)
		#print "The score mean of cross validation : ", scores.mean()
		#score_cv_global.append(scores.mean())

		if(len(set_X_test[i])>0):            
			X_test_scaled = scaler.transform(set_X_test[i][features_train])
			listPred.append( reg.predict(X_test_scaled))
		i=i+1