def fit_model(X, Y): # funkcja zwracająca dopasowanie różnych modeli X = [[x] for x in X] regr = linear_model.LinearRegression().fit(X, Y) regr_ey = linear_model.LinearRegression().fit(np.exp(X), Y) regr_log = linear_model.LinearRegression().fit(np.log(X), Y) nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X, Y) nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X, Y) nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X, Y) return regr, regr_ey, regr_log, nb1NN, nb2NN, nb3NN
def fit_model(X, Y): #### function for generating linear model for individual #### X = [[x] for x in X] regr = linear_model.LinearRegression().fit(X, Y) regr_ey = linear_model.LinearRegression().fit(np.exp(X), Y) regr_log = linear_model.LinearRegression().fit(np.log(X), Y) nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X, Y) nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X, Y) nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X, Y) return [regr, regr_ey, regr_log, nb1NN, nb2NN, nb3NN]
def cross_validation(X, Y): # X - lista data['stimulus'] # Y - lista data['converted'] resultsX = [] resultsId = [] resultsRegr = [] resultsRegr_ey = [] resultsRegr_log = [] results1NN, results2NN, results3NN = [], [], [] resultsY = [] kf = cross_validation.LeaveOneOut(N) #kf = cross_validation.LeaveOneOut(N) #LeaveOneOut == KFold(n, n_folds=n) for train_index, test_index in kf: X_train, Y_train = X[train_index], Y[train_index] X_test, Y_test = X[test_index], Y[test_index] resultsX.append(X_test.values[0]) resultsId.append(Y_test.values[0]) resultsY.append(Y_test.values[0]) X_train_list = [[x] for x in X_train] regr = linear_model.LinearRegression().fit(X_train_list, Y_train) resultsRegr += [[float(regr.predict(x)) for x in X_test]] regr_ey = linear_model.LinearRegression().fit(np.exp(X_train_list), Y_train) resultsRegr_ey += [[float(regr_ey.predict(np.exp(x))) for x in X_test]] regr_log = linear_model.LinearRegression().fit(np.log(X_train_list), Y_train) resultsRegr_log += [[ float(regr_log.predict(np.log(x))) for x in X_test ]] nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X_train_list, Y_train) results1NN += [[float(nb1NN.predict(x)[0]) for x in X_test]] nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X_train_list, Y_train) results2NN += [[float(nb2NN.predict(x)[0]) for x in X_test]] nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X_train_list, Y_train) results3NN += [[float(nb3NN.predict(x)[0]) for x in X_test]] return resultsX, resultsId, resultsY, resultsRegr, resultsRegr_ey, resultsRegr_log, results1NN, results2NN, results3NN
def model(X_train, y_train, X_test=np.array([]), y_test=np.array([]), method="LR"): #X_train inputs of model for training #X_test inputs of model fortesting #y_train -outputs for Xtrain #y_test - outputs fo X_test #method of model design. Default method is linear regression if method == "LR": lr = LR() elif method == "Ridge": lr = Ridge() elif method == "Lasso": lr = Lasso() elif method == "MLPRegressor": lr = MLPRegressor() elif method == "SVR": lr = SVR() elif method == "KNR": lr = KNR() elif method == "RFR": lr = RFR() elif method == "GBR": lr = GBR() else: print("unknown method") return False # lr = MLPRegressor( hidden_layer_sizes=[5], activation ="relu") # lr = MLPRegressor() # lr=SVR() # lr=KNR() # # lr=Ridge(alpha=alpha.x) # lr=Ridge() # lr=Lasso(alpha=0.001) # lr=Lasso() # lr=RFR(n_estimators=5, max_features=2, max_depth=2, random_state=2) # lr=RFR() # lr=GBR() lr = lr.fit(X_train, y_train[:, 0]) y_mod_train = lr.predict(X_train) c_train = CCC(y_train, y_mod_train[:, np.newaxis]) c_test = -1 if len(y_test) > 0: y_mod_test = lr.predict(X_test) c_test = CCC(y_test, y_mod_test[:, np.newaxis]) return (lr, c_train, c_test)
class Algorithms(Enum): RandomForestRegressor = RFR() MLPRegressor = MLPR() KNeighborsRegressor = KNR() Ridge = RR() Lasso = LR() def __str__(self): return self.name
def main(): merged = pd.read_csv('./data/merge_copy.csv') merged = merged[merged['logerror'] < merged['logerror'].quantile(0.995)] # exclude outliers merged = merged[merged['logerror'] > merged['logerror'].quantile(0.005)] merged.drop([ 'garagecarcnt', 'heatingorsystemtypeid', 'propertylandusetypeid', 'regionidcity', u'taxdelinquencyflag', u'taxdelinquencyyear' ], axis=1, inplace=True) logerror = merged['logerror'] X_train, X_test, y_train, y_test = train_test_split(merged.drop( ['logerror', 'parcelid'], axis=1), logerror, test_size=0.2, random_state=42) sc = StandardScaler() sc_train = sc.fit_transform(X_train) sc_test = sc.transform(X_test) # svr = svm.LinearSVR(tol=0.000001,max_iter = 50000) # svr_param_grid = { # 'C' : [0.005,0.01,0.05,0.1], # # 'loss': ('epsilon_insensitive','squared_epsilon_insensitive') # } # svr_gcv = GridSearchCV(svr,svr_param_grid,scoring = make_scorer(mean_absolute_error,greater_is_better=False),n_jobs=6) # svr_gcv.fit(sc.fit_transform(merged.drop(['logerror','parcelid'],axis=1)),logerror) # print svr_gcv.best_estimator_ # print svr_gcv.best_score_ knr = KNR(n_jobs=4) knr_param_grid = { 'n_neighbors': [1500, 1800, 2500], 'leaf_size': [100, 200, 300], 'algorithm': ('ball_tree', 'kd_tree', 'brute'), } knr_gcv = GridSearchCV(knr, knr_param_grid, scoring=make_scorer(mean_absolute_error, greater_is_better=False)) knr_gcv.fit(merged.drop(['logerror', 'parcelid'], axis=1), logerror) knr_best = knr_gcv.best_estimator_ print knr_gcv.best_score_ print "For unscaled features, mean_absolute_error is ", mean_absolute_error( y_test, knr_best.predict(X_test)) print knr_best
def create_growth_rate_mapping(lim_data, Z, **kwargs): """builds a model which maps the gene expression to growth rate includes no growth points outside the data""" # weights are not calculated here, should be done above this level #lim_data = lim_data if 'n_neighbors' in kwargs.keys(): n_neighbors = kwargs['n_neighbors'] else: n_neighbors = 8 allgrdat = lim_data allgr = Z clf = KNR(n_neighbors, 'distance') clf.fit(allgrdat.values, allgr.values) return clf
def predict(k,X,Y,x,stock_symbols): #Makes predictions, creates a classifier, fits the training and target data, and then predicts next days EODs. predictions = {} clf = KNR(n_neighbors=k) # maybe activate weights='distance' #clf = MLPR(hidden_layer_sizes=(8,4),max_iter=10000) if len(X) > 0 and len(X) == len(Y): clf.fit(X,Y) X_width = len(X[0]) i = 0 for stock_symbol in stock_symbols: i += 1 if stock_symbol in x: print("Predicting: %20s %7.2f %% \r" % (stock_symbol, 100*i/len(stock_symbols)),end="") if len(x[stock_symbol]) == X_width: predictions[stock_symbol] = clf.predict([x[stock_symbol]])[0] print() return predictions
def train_model(): data = get_data() data['co'] = data.state == 'colorado' data['fl'] = data.state == 'florida' data['nj'] = data.state == 'new jersey' data['ca'] = data.state == 'california' print(data.shape) X_train, X_test, y_train, y_test = split_data(data) X_train, y_train = remove_county_state(X_train, y_train) X_test, y_test = remove_county_state(X_test, y_test) # data preprocessing (removing mean and scaling to unit variance with StandardScaler) pipeline = make_pipeline(StandardScaler(), KNR()) # set hyperparameters hyperparameters = { 'kneighborsregressor__n_neighbors': [100, 50, 20, 15, 10, 5, 3, 2], 'kneighborsregressor__weights': ['uniform', 'distance'], } # tune model via pipeline clf = GridSearchCV(pipeline, hyperparameters, cv=3) clf.fit(X_train, y_train) pred = clf.predict(X_test) # print('feature importances:', clf.feature_importances_) print('r2 score:', r2_score(y_test, pred)) print('mse:', mean_squared_error(y_test, pred)) print('*' * 20) print('best params:', clf.best_params_) print('best grid:', clf.best_estimator_) print('^' * 20) eval_model(clf.best_estimator_, X_train, y_train, X_test, y_test) print('#' * 20) print('score', clf.score) return clf
#support vector regression svr = SVR(kernel = 'poly', degree = 3, C = 1) #training svr.fit(X_train, y_train) #testing y_pred_svr = svr.predict(X_test) #r_square r_2_svr = r2_score(y_test, y_pred_svr) #KNeighbors Regression knr = KNR(n_neighbors = 4, weights = 'distance', p = 4) #training knr.fit(X_train, y_train) #testing y_pred_knr = knr.predict(X_test) #r_square r_2_knr = r2_score(y_test, y_pred_knr) #Random Forest Regression rfr = RFR(n_estimators = 100, max_features = 'auto', random_state = 1)
j+=1 dicts.append(dic) i+=1 #...... #...... # now get your testing error, store it for different K, and prepare to plot a figure terr = [] for k in ks: for dict in dict """ terr = [] ks = [1,3,5,7,9] for k in ks: knn = KNR(n_neighbors = k) knn.fit(sample_train,label_train) pred = knn.predict(sample_test) mse = (((pred - label_test) ** 2).sum()) / len(pred) terr.append(mse) plt.plot(terr) plt.title("KNN Testing error") plt.legend() plt.xticks([]) plt.show() # ...... # ......
#Standard Scaler to transform data. sc_X = preprocessing.StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) # Data is highly variable due to seasonality of weather parameters, traffic, wind speed, and many other parameters taht are can be assocaited with different pollutants. KNNRegressor is tested first to capture this variation. # In[31]: #import KNeighbors Regressor from sklearn from sklearn.neighbors import KNeighborsRegressor as KNR #initiate KNR regressor with 10 neighbors as an arbitrary starting point regressor = KNR(n_neighbors = 3, metric = 'minkowski', p = 2) regressor.fit(X_train, Y_train) #Predict values based on KNR model Y_hat = regressor.predict(X_test) #Root Mean Squared Error is a common measure of from sklearn.metrics import mean_squared_error from math import sqrt print('RMSE for n_neighbors = 3 is {}.'.format(sqrt(mean_squared_error(Y_test,Y_hat)))) # In[32]:
def run(): df = pd.read_csv(e1.get()) s = int(e2.get()) if s != 0: df.drop(df.columns[s-1], axis=1, inplace=True) s = int(e3.get()) if s != 0: df.drop(df.columns[s-1], axis=1, inplace=True) no_of_rows, no_of_col = df.shape s = e4.get() if s != '0': for column in df: df = df[~df[column].isin([s, 'NaN'])] df.reset_index(drop=True, inplace=True) no_of_rows, no_of_col = df.shape for column in df: if str(type(df[column][0])).replace('<class ', '').replace('>','') == '\'str\'': try: df[column] = df[column].apply(lambda i: clean_float(i)) except ValueError: encoder = le() encoder.fit(df[column]) df[column] = encoder.transform(df[column]) no_of_rows, no_of_col = df.shape p = e5.get() y = np.array((df[p])) x = np.array(df.drop(p, axis=1)) x_train, x_test, y_train, y_test = tts(x, y, test_size=0.3) x_train = preprocessing.scale(x_train) x_test = preprocessing.scale(x_test) c1 = var1.get() c2 = var2.get() if c1 == 1: clf1 = svm.SVR(kernel='rbf') clf1.fit(x_train, y_train) model_name1 = e6.get() with open(model_name1, 'wb') as f: pickle.dump(clf1, f) result.delete('1.0', END) result.insert(END, 'Squared error Accuracy for SVR: ' + str(clf1.score(x_test, y_test))+'\n') if c2 == 1: clf2 = KNR(3) clf2.fit(x_train, y_train) model_name2 = e7.get() with open(model_name2, 'wb') as f: pickle.dump(clf2, f) if c1 != 1: result.delete('1.0', END) result.insert(END, 'Squared error Accuracy for KNN: ' + str(clf2.score(x_test, y_test)))
# df_avg == 3 year rolling average + yr4 stats X, y = df_avg[['all_avg']].values, df_avg['all_yr'].values X, y = df_avg[['all_prev']].values, df_avg['all_yr'].values X, y = df_avg[['all_avg', 'all_prev']].values, df_avg['all_yr'].values X, y = df_avg[[ '1D_avg', '2D_avg', '3D_avg', 'all_avg', '1D_prev', '2D_prev', '3D_prev', 'all_prev' ]].values, df_avg['all_yr'].values X_train, X_test, y_train, y_test = tts(X, y) lin = LR(fit_intercept=False) lin.fit(X, y) lin.score(X, y) knn = KNR(n_neighbors=5) knn.fit(X_train, y_train) print knn.score(X_train, y_train) print knn.score(X_test, y_test) ns = range(1, 30, 2) scores = [] for n in ns: knn = KNR(n_neighbors=n) knn.fit(X_train, y_train) scores.append(knn.score(X_train, y_train)) rf = RFR(n_estimators=50) rf.fit(X_train, y_train) rf.score(X_train, y_train)
def main(): # ---------------------------- # Training data # ---------------------------- # Loading training data trainingDataFile = 'Training_set.csv' trainingData = pd.read_csv(trainingDataFile) # Obtaining unique cases of events (Note: This remains the same for both training and test data) myEventSet = [] for x in trainingData.events: if x not in myEventSet: myEventSet.append(x) print('Unique events are as follows: \n', myEventSet,'\n') # Event string value reassignment based on unique event cases in 'myEventSet' newEvents = [] for x in trainingData.events: for i in range(len(myEventSet)): if x == myEventSet[i]: newEvents.append(i) # Converting datetime to Seconds and saving day of the week day = [] numDateTrainData = [] for i in range(len(trainingData.date)): date_obj = datetime.strptime(str(trainingData.date[i]), '%Y-%m-%d') numDateTrainData.append(date_obj.timestamp()) day.append(date_obj.weekday()) #print(trainingData.date) dictReqCount = {} for i in range(len(trainingData.date)): if day[i] not in dictReqCount.keys(): dictReqCount[day[i]] = [] dictReqCount[day[i]].append(trainingData.request_count[i]) #print(dictReqCount) dictAvgReqCount = {} for key,val in dictReqCount.items(): dictAvgReqCount[key] = sum(val)/len(val) #print(dictAvgReqCount) maxValue = max(dictAvgReqCount.values()) maxKey = [key for key,val in dictAvgReqCount.items() if val == maxValue] print('Day #{} of the week has the max mean request count'.format(maxKey[0])) minValue = min(dictAvgReqCount.values()) minKey = [key for key, val in dictAvgReqCount.items() if val == minValue] print('Day #{} of the week has the min mean request count'.format(minKey[0])) # Assembling feature arrays features_trainingData = [] for i in range(len(numDateTrainData)): row = [numDateTrainData[i], day[i], trainingData.calendar_code[i], trainingData.site_count[i], trainingData.max_temp[i], trainingData.min_temp[i], trainingData.precipitation[i], newEvents[i]]; features_trainingData.append(row) #for i in range(len(features_trainingData)): # print(len(features_trainingData[i])) #Y = list(trainingData.request_count) Y = trainingData.request_count X = features_trainingData #print('length of Y =', len(Y)) #print(features_trainingData) # Models that work on both continuous and discrete data scoring = 'neg_mean_squared_error' models = [DTR(),GNB(),RFR(),KNR()] '''models = [[DTR(), DTR(max_depth=2), DTR(max_depth=5)], [GNB(), GNB(priors=None)], [RFR(), RFR(), RFR()], [KNR(), KNR(), KNR()]] ''' seed = 7 kfold = MS.KFold(n_splits=10, random_state=seed) i = 0 mErr = [] for model in models: results = MS.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) mErr.append(results.mean()) i += 1 #print(mErr) best_model_index = 0 maxAbsErrInd = math.fabs(mErr[0]) for i in range(1, len(mErr)): if (math.fabs(mErr[i]) < maxAbsErrInd): best_model_index = i maxAbsErrInd = math.fabs(mErr[i]) print('\nModel #%d (i.e. %s) performed best' %(best_model_index, str(models[best_model_index]).split('(')[0])) # ------------------------------------------------------- # Test Data # ------------------------------------------------------- # Loading test data testDataFile = 'Test_set.csv' testData = pd.read_csv(testDataFile) # Event string reassignment using myEventSet from training data newEvents = [] for x in testData.events: for i in range(len(myEventSet)): if x == myEventSet[i]: newEvents.append(i) # Converting datetime to Seconds and determining days of the week day = [] numDateTestData = [] for i in range(len(testData.date)): date_obj = datetime.strptime(str(testData.date[i]), '%Y-%m-%d') numDateTestData.append(date_obj.timestamp()) day.append(date_obj.weekday()) # Assembling feature arrays features_testData = [] for i in range(len(numDateTestData)): row = [numDateTestData[i], day[i], testData.calendar_code[i], testData.site_count[i], testData.max_temp[i], testData.min_temp[i], testData.precipitation[i], newEvents[i]]; features_testData.append(row) # Test data features X_test = features_testData # Test data prediction bestModel = models[best_model_index] Y_pred = bestModel.fit(X, Y).predict(X_test) Y_pred_train = bestModel.fit(X, Y).predict(X) print('\nThe predicted values for request count using the test data is as follows:\n',Y_pred) output_file = open('predicted_request_count.csv','w') for i in range(len(Y_pred)): output_file.write(str(Y_pred[i])+'\n') output_file.close() # Plot the results plt.figure(1) plt.scatter(numDateTrainData, Y, c="darkorange", label="Training data") plt.scatter(numDateTestData, Y_pred, c="cornflowerblue", label="Test data model prediction") plt.scatter(numDateTrainData, Y_pred_train, c="red", label="Training data model prediction") plt.xlabel("Numerical Date") plt.ylabel("Page Count") plt.title("Best Model") plt.legend() plt.show()
print('The R2 score of the Decision Tree Regressor model is: \t\t\t%s' % dtr_r2) # DECISION TREE CLASSIFIER dtc = DTC() dtc.fit(X_train, y_train) dtc_msq = msq(dtc.predict(X_test), y_test) dtc_r2 = r2(dtc.predict(X_test), y_test) print( '\nThe mean squared error of the Decision Tree Classifier model is: \t%s' % dtc_msq) print('The R2 score of the Decision Tree Classifier model is: \t\t\t%s' % dtc_r2) # K NEAREST NEIGHBORS REGRESSOR knr = KNR() knr.fit(X_train, y_train) knr_msq = msq(knr.predict(X_test), y_test) knr_r2 = r2(knr.predict(X_test), y_test) print( '\nThe mean squared error of the K Nearest Neighbors Regressor model is: \t%s' % knr_msq) print('The R2 score of the K Nearest Neighbors Regressor model is: \t\t%s' % knr_r2) # K NEAREST NEIGHBORS CLASSIFIER knc = KNC() knc.fit(X_train, y_train) knc_msq = msq(knc.predict(X_test), y_test) knc_r2 = r2(knc.predict(X_test), y_test) print(
# N17-->N9,N4,N24,N8 # N19-->N6,N1 # N20-->N4,N8 # N21-->N14,N10,N6 # N22-->N13,N4 # N23-->N24 # N35-->N20,N2 data = full[0:33050] scaler = RobustScaler() train = scaler.fit_transform(data.loc[data.N23.notnull(), ['N24']]) test = full.loc[full.N23.isnull(), ['N24']] ya = data.loc[data.N23.notnull(), ['N23']] model = KNR() model.fit(train, ya) pred = model.predict(test) full.loc[full.N23.isnull(), ['N23']] = pred ##xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx full.loc[full.N13.isnull(), 'N13'] = 3 full.loc[full.N4.isnull(), 'N4'] = full.loc[((full['C1'] == 1) & (full['C2'] == 0)), 'N4'].mean() data = full[0:33050]
import matlab import os from sklearn.neighbors import KNeighborsRegressor as KNR today_train_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\train\today' tomorrow_train_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\train\tomorrow' today_test_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\test\today' tomorrow_test_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\test\tomorrow' result_dir = r'D:\MasterDL\trans\yabx\knn_result' today_train_data = matlab.read_matfile_from_dir(today_train_data_dir, 'speed',[252,35*288]) tomorrow_train_data = matlab.read_matfile_from_dir(tomorrow_train_data_dir, 'speed',[252,35*288]) today_test_data = matlab.read_matfile_from_dir(today_test_data_dir,'speed',[95,35*288]) tomorrow_test_data = matlab.read_matfile_from_dir(tomorrow_test_data_dir,'speed',[95,35*288]) reality = np.reshape(tomorrow_test_data,[1,95*35*288]) KNN = KNR(n_neighbors=1) KNN.fit(today_train_data,today_train_data) predictions = KNN.predict(today_test_data) matlab.save_matrix(os.path.join(result_dir, 'knn_result.mat'), predictions, 'knn') predictions = np.reshape(predictions,[1,95*35*288]) mse = ((reality-predictions)**2).mean() print('mse = ', mse) for i in range(0,95*35*288): if reality[0][i] == 0: reality[0][i]=1 rer = np.mean(abs(reality-predictions)/reality) mae = np.mean(abs(reality-predictions)) print('mre = ', rer) print('mae = ', mae)
import matplotlib.pyplot as plt Features, Labels = read_data("energy.txt") X_train, X_test, y_train, y_test = train_test_split(Features, Labels, test_size=.20, random_state=37) #Standardization of data X_train_scaled = prepros.scale(X_train) X_test_scaled = prepros.scale(X_test) """ ####################K-Nearest Neighbors#################### """ #No changes KNNregressor = KNR().fit(X_train, y_train) KNN_y_pred = KNNregressor.predict(X_test) print("Mean squared error for K-Nearest Neighbors: {:.3f}.".format( mean_squared_error(KNN_y_pred, y_test))) #number of neigbors changed to 7 KNNregressor = KNR(n_neighbors=7).fit(X_train, y_train) KNN_y_pred = KNNregressor.predict(X_test) print("\tNumber of Neigbors changed to 7: {:.3f}.".format( mean_squared_error(KNN_y_pred, y_test))) #wieght changed to distance KNNregressor = KNR(weights='distance').fit(X_train, y_train) KNN_y_pred = KNNregressor.predict(X_test) print("\tWieght changed to distance: {:.3f}.".format( mean_squared_error(KNN_y_pred, y_test)))
from sklearn.neighbors import KNeighborsRegressor as KNR from sklearn import datasets as ds from sklearn.preprocessing import scale from numpy import linspace import sklearn.cross_validation as cv boston = ds.load_boston() scaled = scale(boston.data) kf = cv.KFold(len(scaled), n_folds=5, shuffle=True, random_state=42) n = linspace(start=1, stop=10, num=200) result = list() for i in xrange(1, len(n) - 1): knr = KNR(n_neighbors=4, weights='distance', metric='minkowski', p=i) cs_result = cv.cross_val_score(knr, X=scaled, y=boston.target, cv=kf, scoring='mean_squared_error') result.append(cs_result.mean()) result_max = max(result) print result.index(result_max) + 1 print result_max
except ValueError: encoder = le() encoder.fit(df[column]) df[column] = encoder.transform(df[column]) p = input("Enter the name of prediction column:") y = np.array((df[p])) x = np.array(df.drop(p, axis=1)) x_train, x_test, y_train, y_test = tts(x, y, test_size=0.3) x_train = preprocessing.scale(x_train) x_test = preprocessing.scale(x_test) y_train = preprocessing.scale(y_train) y_test = preprocessing.scale(y_test) print("Which algo would you like to use") print("1. SVM") print("2. KNN") c = int(input("Enter your choice: ")) if c == 1: clf1 = svm.SVR(kernel='poly', degree=7) clf1.fit(x_train, y_train) print('Squared error Accuracy for SVR: ', clf1.score(x_test, y_test)) else: clf = KNR(10) clf.fit(x_train, y_train) print('Squared error Accuracy for KNN: ', clf.score(x_test, y_test))
data = data.iloc[:rows, 1:] Model_Names = [ "Support Vector Classifier", "Linear SVC", "GaussianNB", "Random Forest Classifier", "K Nearest Neighbours", "AdaBoost Classifier", "Decision Tree Classifier" ] print(data.columns.values) Models = { 1: SVC(C=1, kernel='rbf', gamma='scale', probability=True), 2: LinearSVC(), 3: GNB(), 4: RFC(n_estimators=100), 5: KNR(), 6: ABC(base_estimator=RFC()), 7: DTC() } def RunModel(model, data, columns, Predict): X = data[columns] Y = data[Predict] X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=train, test_size=test, random_state=42)
svc_mnist.score(X_test_mnist_nmf, y_test_mnist))) mlpc_mnist = MLPC(hidden_layer_sizes=(200), activation='tanh', random_state=37).fit(X_train_mnist_nmf, y_train_mnist) print("Test set score of MLPC mnist: {:.3f}".format( mlpc_mnist.score(X_test_mnist_nmf, y_test_mnist))) print('mnist\n') """ ##############################Regression############################## """ from sklearn.metrics import mean_squared_error #k-Neighbors Regressor from sklearn.neighbors import KNeighborsRegressor as KNR knr_energy = KNR(weights='distance').fit(X_train_energy_pca, y_train_energy) y_pred_knr = knr_energy.predict(X_test_energy_pca) print("Mean squared error for kNN: {:.3f}.".format( mean_squared_error(y_pred_knr, y_test_energy))) #Decision Tree Regressor from sklearn.tree import DecisionTreeRegressor as DTR dtr_energy = DTR(max_depth=11, min_samples_split=16, min_samples_leaf=2, random_state=37).fit(X_train_energy_stand, y_train_energy) y_pred_dtr = dtr_energy.predict(X_test_energy_stand) print("Mean squared error for DTR: {:.3f}.".format( mean_squared_error(y_pred_dtr, y_test_energy))) #Random Forest Regressor
from sklearn.neighbors import KNeighborsRegressor as KNR from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler data_train = np.loadtxt('data_train.csv', delimiter=",", dtype="float") x = data_train[..., 0:20 ] ss = MinMaxScaler() x = ss.fit_transform(x) y = data_train[...,20 ] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=16) model = KNR(n_neighbors=4) model.fit(x_train,y_train) rmse = np.sqrt(mse(y_train,model.predict(x_train))) r2 = r2_score(y_train,model.predict(x_train)) rmset = np.sqrt(mse(y_test,model.predict(x_test))) r2t = r2_score(y_test,model.predict(x_test)) print(model.predict(x_test)) print(y_test) print(rmse) print(rmset) print(r2) print(r2t)
import numpy as np from matplotlib import pyplot as plt from sklearn.preprocessing import PolynomialFeatures from ngboost.evaluation import * from ngboost.ngboost import NGBoost from ngboost.learners import default_linear_learner, default_tree_learner from ngboost.distns import Normal from ngboost.scores import MLE, CRPS from sklearn.neighbors import KNeighborsRegressor as KNR from argparse import ArgumentParser np.random.seed(1) default_knr_learner = lambda: KNR() def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1): x = np.linspace(-bound, bound, n)[:, np.newaxis] h = np.linspace(-bound, bound, n)[:, np.newaxis] e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x)) y = 50 * (x**deg) + h * beta + noise * e + intcpt return x, y.squeeze(), np.c_[h, np.ones_like(h)] if __name__ == "__main__": argparser = ArgumentParser() argparser.add_argument("--n-estimators", type=int, default=301) argparser.add_argument("--lr", type=float, default=0.03) argparser.add_argument("--minibatch-frac", type=float, default=0.1) argparser.add_argument("--natural", action="store_true")
def cross_validation(X, Y): from sklearn.metrics import mean_squared_error as mse from sklearn import cross_validation, linear_model from math import exp, log from sklearn.neighbors import NearestNeighbors as NN from sklearn.neighbors import KNeighborsClassifier as KNC from sklearn.neighbors import KNeighborsRegressor as KNR N = 31 resultsX = [] resultsId = [] resultsRegr = [] resultsRegr_ey = [] resultsRegr_log = [] results1NN, results2NN, results3NN = [], [], [] resultsY = [] kf = cross_validation.KFold(N, n_folds=10, random_state=True) #kf = cross_validation.LeaveOneOut(N) #LeaveOneOut == KFold(n, n_folds=n) for train_index, test_index in kf: X_train, Y_train = X[train_index], Y[train_index] X_test, Y_test = X[test_index], Y[test_index] print(X_train, X_test) resultsX.append(X_test) resultsId.append(Y_test) resultsY.append(Y_test) X_train_list = [[x] for x in X_train] regr = linear_model.LinearRegression().fit(X_train_list, Y_train) resultsRegr += [[float(regr.predict(x)) for x in X_test]] regr_ey = linear_model.LinearRegression().fit(np.exp(X_train_list), Y_train) resultsRegr_ey += [[float(regr_ey.predict(np.exp(x))) for x in X_test]] # X_regr_ey = [] # for x in X_test: # x_regr = float(regr_ey.predict(x)) # if (x_regr >= 1): # X_regr_ey.append(log(x_regr)) # else: # X_regr_ey.append(1) # resultsRegr_ey += [X_regr_ey] # resultsRegr_ey += [[log(float(regr_ey.predict(x))) for x in X_test]] # resultsRegr_ey += [[log(abs(float(regr_ey.predict(x)))) for x in X_test]] regr_log = linear_model.LinearRegression().fit(np.log(X_train_list), Y_train) resultsRegr_log += [[ float(regr_log.predict(np.log(x))) for x in X_test ]] #Y_train = np.asarray(Y_train, dtype="|S6") nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X_train_list, Y_train) results1NN += [[float(nb1NN.predict(x)[0]) for x in X_test]] nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X_train_list, Y_train) results2NN += [[float(nb2NN.predict(x)[0]) for x in X_test]] nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X_train_list, Y_train) results3NN += [[float(nb3NN.predict(x)[0]) for x in X_test]] #return [mse(resultsX, resultsId), mse(resultsX, resultsRegr), mse(resultsX, resultsRegr_ey), mse(resultsX, resultsRegr_log), mse(resultsX, results1NN), mse(resultsX, results2NN), mse(resultsX, results3NN)] return [ mse(resultsX, resultsId), mse(resultsY, resultsRegr), mse(resultsY, resultsRegr_ey), mse(resultsY, resultsRegr_log), mse(resultsY, results1NN), mse(resultsY, results2NN), mse(resultsY, results3NN) ]