コード例 #1
0
ファイル: tests.py プロジェクト: skwwt88/xgboost-from-scratch
    def test_split_node(self):
        regressor = Regressor()
        regressor.fit(x, y)

        print(regressor.predict(x_valid))
コード例 #2
0
       'Age_categories_Senior']

target_column = 'Survived';

r.train_machine(train[columns], train[target_column]);

holdout = test; 

all_X = train[columns]
all_y = train[target_column]

train_x, test_x, train_y, test_y = sp.split(train[columns], train[target_column]);
# toPrint = sr.get_train()['Age'].describe();
# print(toPrint)
r.train_machine(train_x, train_y);
predictions = r.predict(test_x);
accuracy = mt.model_accuracy(test_y, predictions);

regressor_object = Regressor();
reg = regressor_object.get_regressor();

mt.set_cross_score(reg, all_X, all_y, 10)
mt.sort_score();
scores = mt.get_scores();
cross_accurace = mt.get_mean();

regressor_object_1 = Regressor();
regressor_object_1.train_machine(all_X, all_y);
prediction = r.predict(holdout[columns]);

# back_x = train_x;
コード例 #3
0
Y_train=np.array(Y_train)
X_train=np.array(X_train)
X_test=np.array(X_test)


#### Creation of regressor 
reg=Regressor()


#### Cross validation
print "Cross validation ..."
#loo = cross_validation.LeaveOneOut(len(y_df))
loo=10
scores = cross_validation.cross_val_score(reg, X_train, Y_train, scoring='mean_squared_error', cv=loo,)
print "The score mean of cross validation : "
print scores.mean()

#### fit 
print "Fit ..."
reg.fit(X_train, Y_train)


#### Prediction
print "Prediction ..."
Y_pred = reg.predict(X_test)

#### write the submission
print "Write the submission ..."
make_submission(dataTest,Y_pred)

print "End."
コード例 #4
0
T0 = time()

print "load dataset..."
X_df_2011 = pd.DataFrame.from_csv("datasets/2011.csv")
X_df_2012 = pd.DataFrame.from_csv("datasets/2012.csv")
X_df_2013 = pd.DataFrame.from_csv("datasets/2013.csv")
X_df = pd.concat([X_df_2011, X_df_2012, X_df_2013], axis=0)

print "load dates..."
with open("target_dates_1.pkl") as f:
    dates = pickle.load(f)
    # date n1677, n3051 and n3451 cause trouble
    dates = dates.delete([1677, 3051, 3451])
sub = load_submission("data/submission.txt")
pred_dates = sub.index
fit_dates = load_all_data().index

fit_dates = fit_dates.delete(range(18024))  # hack
print "make the prediction..."
# make prediction
reg = Regressor()
reg.fit(fit_dates)
pred = reg.predict(pred_dates)
print "acquire the true value..."
target = X_df.loc[dates]
print "compute error..."
# get the error
err = get_error_dfs(pred, target)
print "LinExp error: ", err, "run in :", time() - T0, "s"
コード例 #5
0
    # Visualizing the results
    visualizer = Visualizer()
    visualizer.plot_classifier_regressor(y_test, y_predicted,
                                         method_identifier)

    print('The accuracy is: ' + str(classifier_accuracy) + ' %')
    print(algorithm_name)

# ---------------------Applying Regression to the data--------------------------
elif method_identifier == 2:

    from regressor import Regressor

    regressor = Regressor(algorithm_name)
    y_predicted = regressor.predict(X_train, y_train, X_test)
    regressor_score = regressor.get_score(y_test, y_predicted)

    # Visualizing the results
    visualizer = Visualizer()
    visualizer.plot_classifier_regressor(y_test, y_predicted,
                                         method_identifier)

    print('The coefficient of determination is: ' + str(regressor_score))
    print(algorithm_name)

# ---------------------Clustering the data------------------------------------
elif method_identifier == 3:

    from clustering import Clustering
コード例 #6
0
min_max_scaler = MinMaxScaler()
df = pd.read_csv("market-price-2014.csv")
df_norm = df.drop(df.columns[0], 1, inplace=True)
data_splitter = DataSplitter(df)
df_train, df_validate, df_test = data_splitter.train_validate_test_split()

data_splitter = DataSplitter(df_train)
x_train, y_train = data_splitter.get_XY_sets(min_max_scaler, 30, 5)

data_splitter = DataSplitter(df_validate)
x_validate, y_validate = data_splitter.get_XY_sets(min_max_scaler, 30, 5)

regressor = Regressor(x_train, y_train, x_validate, y_validate).train()

# PREDICT PRICE
test_set = df_test.values
data_splitter = DataSplitter(df_test)
inputs, outputs = data_splitter.get_XY_sets(min_max_scaler, 30, 5)
predicted_price = regressor.predict(inputs)

x = np.array(outputs).ravel()
y = np.array(predicted_price).ravel()
rmse = sqrt(mean_squared_error(x, y))
print('RMSE: %.3f' % rmse)

predicted_price = min_max_scaler.inverse_transform(
    np.array(predicted_price[-2]).reshape(-1, 1)).tolist()
plotter = Plotter(test_set[-10:-5], predicted_price)
plotter.plot()
コード例 #7
0
# print(test.columns)
# print(train.columns)

train_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data'];
test_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data'];
# print(train[train_columns].to_string())

train[train_columns],test[test_columns] = processor_ms.scale_fit_train_test(train[train_columns],test[test_columns]);

# print(train[train_columns].to_string())
# print(test[test_columns])

# Regressor
regressor_object_1 = Regressor();
regressor_object_1.train_machine(train[train_columns], train['Survived']);
prediction = regressor_object_1.predict(test[train_columns]);
prediction = prediction.astype(int);
print(prediction);

# #################
# SUBMIT ANSWER
# #################
# print(test.columns);
holdout_ids = test["PassengerId"];
sub_df = {
	"PassengerId":holdout_ids,
	"Survived": prediction	
};

ds = Data_Set(sub_df);
ds.to_csv("normalised_submission_test_fitted");
コード例 #8
0
max_p_value = 1
non_significant_index = -1
eliminator = None
while max_p_value > 0.05:
    if not non_significant_index == -1:
        x_train = np.delete(x_train, non_significant_index, 1)
        x_test = np.delete(x_test, non_significant_index, 1)
    eliminator = Back_Elimination()
    eliminator.fit_OLS(y_train, x_train)
    p_values = eliminator.get_p_values()
    max_p_value = np.amax(p_values)
    non_significant_index = list(p_values).index(max_p_value)
""" LOGISTIC REGRESSION """
regressor = Regressor()
regressor.train_machine(x_train, y_train)
prediction = regressor.predict(x_test)
print(prediction)

# #################
# SUBMIT ANSWER
# #################
# print(test.columns);
holdout_ids = df_test['Id']
sub_df = {
    "Id": holdout_ids,
    "Cover_Type": prediction
}

ds = Data_Set(sub_df)
ds.to_csv("submission", index=false)
コード例 #9
0
ファイル: main.py プロジェクト: BavoGoosens/Capita4
# regressorA = linear_model.RANSACRegressor()
# regressorA = RadiusNeighborsRegressor(radius=1.0)
# regressorA = KNeighborsRegressor(n_neighbors=4)

regressorB = MetaRegressor([regressorB2])
regressorA = MetaRegressor(
    [regressorA1, regressorA2, regressorA3, regressorA4, regressorA5])
baseRegressor = linear_model.LinearRegression()

regressor = Regressor(regressorA, regressorB, baseRegressor)
regressor.fit(historic_data_set, target_data_set)

# plot the trained models against the data they were trained on
# together with least squares measures(in order to experiment with diff linear models)

predict_base, predict_anomaly, predict_total, predict_dummy = regressor.predict(
    historic_data_set)

plt.figure(1)
plt.subplot(311)
plt.plot(predict_total, label="total")
plt.plot(predict_base, label="base")
plt.plot(predict_anomaly, label="anomaly")
plt.plot(target_data_set, label="target")
plt.plot(predict_dummy, label="dummy")
plt.grid(True)
plt.legend()

# plot the predicted values (by the model) against the actual prices for that week
# it is this prediction that we'll feed to the scheduler

#exp
コード例 #10
0
df_features = df.drop('target', axis=1)
y = df.target.values

df_train, df_test, y_train, y_test = train_test_split(df_features, y, test_size=0.5, random_state=42)


feature_extractor = FeatureExtractor()
model = Regressor()


X_train = feature_extractor.transform(df_train)
model.fit(X_train, y_train)

X_test = feature_extractor.transform(df_test)
y_pred = model.predict(X_test)
print('RMSE = ', np.sqrt(mean_squared_error(y_test, y_pred)))


imputer = model.clf.named_steps['imputer']

valid_idx = imputer.transform(np.arange(df_train.shape[1])).astype(np.int)
et = model.clf.named_steps['extratreesregressor']

feature_importances = pd.DataFrame(data=et.feature_importances_,
                                   index=df_train.columns[valid_idx][0])
feature_importances['counts'] = df_train.count()[valid_idx][0]
feature_importances.to_csv('feature_importance.csv')


コード例 #11
0
		print " Train et Predict the categorie : ",i
		reg=Regressor()
		reg.fit(X_train_scaled, set_Y_train[i])


		#### Cross validation
		#print "Cross validation ...", i
		#loo = cross_validation.LeaveOneOut(len(y_df))
		#loo=10
		#scores = cross_validation.cross_val_score(reg, X_train_scaled, set_Y_train[i], scoring='neg_mean_squared_error', cv=loo,)
		#print "The score mean of cross validation : ", scores.mean()
		#score_cv_global.append(scores.mean())

		if(len(set_X_test[i])>0):            
			X_test_scaled = scaler.transform(set_X_test[i][features_train])
			listPred.append( reg.predict(X_test_scaled))
		i=i+1


	l=0
	i=0
	while l<len(set_X_test):
	if(len(set_X_test[l])>0):
		set_X_test[l]['CSPL_RECEIVED_CALLS'] =   listPred[i]
	   	i=i+1
	l=l+1



	#on réassemble les valeurs de prédiction
	resultPred= pd.concat(set_X_test)