def ruleOfThumb(n=10**5, dim=2, C=1, seed=371986): rng = np.random.RandomState(seed) Cov = rng.normal(0, 1, (dim, dim)) XL, yL = data_gen.dataset_fixed_cov(n, dim, seed, Cov) rng = np.random.RandomState(seed) min_error = sys.maxsize permutation = rng.permutation(len(XL)) #print(X , y) XL, yL = XL[permutation], yL[permutation] XL1 = XL yL1 = yL n_features = XL1.shape[1] #print(n_features) # split 50%-50% training sets and test sets scaler = preprocessing.StandardScaler() XL1 = scaler.fit_transform(XL1, yL1) #test_X1 = scaler.fit_transform(test_X1,test_y1) train_XL1, test_XL1, train_yL1, test_yL1 = train_test_split( XL1, yL1, train_size=0.5, random_state=seed) svm = my_svm(iter=100) start_time = time.time() w, obj, step = svm.my_gradient_descent(train_XL1, train_yL1, stop=stop.perfor, C=C) print("---Cost %s seconds for GD ---" % (time.time() - start_time)) error_GD = svm.predict(test_XL1, test_yL1, w=w[len(w) - 1]) svm = my_svm(iter=100) start_time = time.time() w, obj, sp = svm.my_sgd(train_XL1, train_yL1, seed=seed, stop=stop.perfor, step=0.11, t0=2, C=C) print("---Cost %s seconds for SGD ---" % (time.time() - start_time)) error_SGD = svm.predict(test_XL1, test_yL1, w=w[len(w) - 1]) if (error_GD < error_SGD): print( "Use GD to run the data number = %f and dimention = %d , and C = %d , with error rate = %f " % (n, dim, C, error_GD)) else: print( "Use SGD to run the data number = %f and dimention = %d , and C = %d , with error rate = %f" % (n, dim, C, error_SGD))
def run(centroid , group_num , train_X , train_y ,test_X , test_y, method='knn' , n_nb = 2 , seed = 371986): # will load data as the patch size defined , 3 means 3*3 = 9 for each patch, and will return the dictionary included: # 'data' (one patch) , 'target' (the sample of this patch belongs to ) , 'filename' (the file comes from) bofs = [] vlad = my_vlad(centroid[group_num]) for file in train_X: mydata,y = myData.load_sig_data(file , 3) bofs.append(vlad.get_vlad(mydata['data']).flatten() ) #knn_init = KNeighborsClassifier() #parameters = {'n_neighbors':[ 5, 10 , 15]} #knn = grid_search.GridSearchCV(knn_init, parameters) bofs_test = [] for file in test_X: mydata,y = myData.load_sig_data(file , 3) bofs_test.append(vlad.get_vlad(mydata['data']).flatten() ) if(method == "knn"): knn = KNeighborsClassifier(n_neighbors = n_nb) knn.fit(bofs, train_y) predicted = knn.predict(bofs_test) score = knn.score(bofs_test,test_y) if(method == "LinearSVM"): svm = my_svm(iter = 100 ) w , obj , sp = svm.my_sgd(bofs,train_y, seed = seed , stop = stop.perfor , step = 0.11, t0 = 2, C = 1 ) score = 100-svm.predict(bofs_test,test_y) return score
def ruleOfThumb(n = 10**5 , dim = 2 , C = 1 , seed = 371986 ): rng = np.random.RandomState(seed) Cov = rng.normal(0, 1, (dim, dim)) XL, yL = data_gen.dataset_fixed_cov(n, dim, seed , Cov) rng = np.random.RandomState(seed) min_error = sys.maxsize permutation = rng.permutation(len(XL)) #print(X , y) XL, yL = XL[permutation], yL[permutation] XL1 = XL yL1 = yL n_features = XL1.shape[1] #print(n_features) # split 50%-50% training sets and test sets scaler = preprocessing.StandardScaler() XL1 = scaler.fit_transform(XL1,yL1) #test_X1 = scaler.fit_transform(test_X1,test_y1) train_XL1, test_XL1, train_yL1, test_yL1 = train_test_split(XL1, yL1, train_size=0.5, random_state=seed) svm = my_svm(iter = 100 ) start_time = time.time() w , obj , step= svm.my_gradient_descent(train_XL1,train_yL1, stop = stop.perfor , C = C ) print("---Cost %s seconds for GD ---" % (time.time() - start_time)) error_GD = svm.predict(test_XL1,test_yL1,w=w[len(w)-1]) svm = my_svm(iter = 100 ) start_time = time.time() w , obj , sp = svm.my_sgd(train_XL1,train_yL1, seed = seed , stop = stop.perfor , step = 0.11, t0 = 2, C = C ) print("---Cost %s seconds for SGD ---" % (time.time() - start_time)) error_SGD = svm.predict(test_XL1,test_yL1,w=w[len(w)-1]) if(error_GD < error_SGD): print("Use GD to run the data number = %f and dimention = %d , and C = %d , with error rate = %f " %(n , dim , C , error_GD)) else : print("Use SGD to run the data number = %f and dimention = %d , and C = %d , with error rate = %f" %(n , dim , C , error_SGD))
def run(centroid, group_num, train_X, train_y, test_X, test_y, method='knn', n_nb=2, seed=371986): # will load data as the patch size defined , 3 means 3*3 = 9 for each patch, and will return the dictionary included: # 'data' (one patch) , 'target' (the sample of this patch belongs to ) , 'filename' (the file comes from) bofs = [] vlad = my_vlad(centroid[group_num]) for file in train_X: mydata, y = myData.load_sig_data(file, 3) bofs.append(vlad.get_vlad(mydata['data']).flatten()) #knn_init = KNeighborsClassifier() #parameters = {'n_neighbors':[ 5, 10 , 15]} #knn = grid_search.GridSearchCV(knn_init, parameters) bofs_test = [] for file in test_X: mydata, y = myData.load_sig_data(file, 3) bofs_test.append(vlad.get_vlad(mydata['data']).flatten()) if (method == "knn"): knn = KNeighborsClassifier(n_neighbors=n_nb) knn.fit(bofs, train_y) predicted = knn.predict(bofs_test) score = knn.score(bofs_test, test_y) if (method == "LinearSVM"): svm = my_svm(iter=100) w, obj, sp = svm.my_sgd(bofs, train_y, seed=seed, stop=stop.perfor, step=0.11, t0=2, C=1) score = 100 - svm.predict(bofs_test, test_y) return score