def thetaEuler(nx, ntmax, theta): '''theta == 0 means forward Euler method, theta == 1 means backward Euler method. 0 <= theta <= 1. ''' dx = 1.0/nx dt = 0.5*dx**2 x = [] for i in range(nx + 1): x.append(i*dx) u = np.zeros(nx + 1) dimension = nx - 1 A = sparse.createSparse(dimension, theta*dt/dx**2).toarray() B = sparse.createSparse(dimension, -(1-theta)*dt/dx**2).toarray() f = np.zeros(len(x)) for i in range(len(x)): f[i] = F(x[i]) tol = 0.0001 iterMax = 100 step = 0 while(True): V = [] for element in u[1:-1]: V.append(element) u[1:-1] = conjugateGradient.conjugateGradient(A, np.dot(B, u[1:-1]) + dt*f[1:-1], tol, iterMax) step = step + 1 residual = 0 for i in range(len(V)): residual = residual + (V[i] - u[1:-1][i])**2 #print step #print residual if (step > ntmax): break if (np.sqrt(residual) < 0.0000001): break return x, u
def train_ner(self, X_train, y_train, X_val, y_val): self.epoch = 20 start_time = time.time() hist_rfvd = [] hist_norm = [] hist_acc = [] hist_time = [] for i in range(self.epoch): # if i % 10 == 0: I = (1 - y_train * X_train.dot(self.w)) > 0 X_I = X_train[I, :] y_I = y_train[I] # w : d x 1 dim # dw : d x 1 dim # X_I : I x d dim # y_I : I x 1 dim dw = self.w + 2 * self.lam / X_train.shape[0] * X_I.T.dot( X_I.dot(self.w) - y_I) d, _ = cg.conjugateGradient(X_train, I, dw, self.lam) # H * d = - dw, d = - H^(-1) * dw self.w = self.w + d hist_time.append(time.time() - start_time) hinge = 1 - y_train * X_train.dot(self.w) loss = 1 / 2 * np.dot( self.w.T, self.w) + self.lam / X_train.shape[0] * np.dot( np.maximum(hinge.T, 0), np.maximum(hinge, 0)) rfvd = (loss - self.f_w_star) / self.f_w_star acc = self.calAccuracy(X_val, y_val) norm = np.linalg.norm(dw, 2) hist_rfvd.append(rfvd) hist_norm.append(norm) hist_acc.append(acc) print(i, "th iter") print("hist_relative_f", hist_rfvd[i]) print("hist_norm", hist_norm[i]) print("hist_acc", hist_acc[i]) self.history['ner_rfvd'] = hist_rfvd self.history['ner_gnorm'] = hist_norm self.history['ner_acc'] = hist_acc self.history['ner_time'] = hist_time return self
def trainNEW(self, x, y, x_test, y_test): print("START NM") start = time.time() n = x.get_shape()[0] lossHistory, normHistory, accHistory, timeHistory = [], [], [], [] for i in range(self.newiter): print(i, "/", self.newiter) xW = x.dot(self.W) yxW = y * xW I = np.nonzero((np.ones_like(yxW) - yxW) > 0)[0] XI = x.toarray() XI = XI[I, :] XI = csr_matrix(XI) loss_seg = XI.dot(self.W) - y[I] dW = self.W + 2 * self.lam / n * XI.transpose().dot(loss_seg) d, _ = cg.conjugateGradient(x, I, dW, self.lam) self.W = self.W + d timeHistory.append(time.time() - start) xx = 1 - y * x.dot(self.W) loss = 0.5 * np.dot(self.W.T, self.W) + self.lam / n * np.dot( np.maximum(xx, 0).T, np.maximum(xx, 0)) rf = (loss - self.W_star) / self.W_star norm = np.linalg.norm(dW, 2) acc = self.calAccuracy(x_test, y_test) normHistory.append(norm) lossHistory.append(rf) accHistory.append(acc) print("NM_accuracy:", accHistory[-1]) print("NM_total time:", timeHistory[-1]) print("FINISH NM") return lossHistory, normHistory, accHistory, timeHistory
def thetaEuler(nx, ntmax, theta): '''theta == 0 means forward Euler method, theta == 1 means backward Euler method. 0 <= theta <= 1. ''' dx = 1.0 / nx dt = 0.5 * dx**2 x = [] for i in range(nx + 1): x.append(i * dx) u = np.zeros(nx + 1) dimension = nx - 1 A = sparse.createSparse(dimension, theta * dt / dx**2).toarray() B = sparse.createSparse(dimension, -(1 - theta) * dt / dx**2).toarray() f = np.zeros(len(x)) for i in range(len(x)): f[i] = F(x[i]) tol = 0.0001 iterMax = 100 step = 0 while (True): V = [] for element in u[1:-1]: V.append(element) u[1:-1] = conjugateGradient.conjugateGradient( A, np.dot(B, u[1:-1]) + dt * f[1:-1], tol, iterMax) step = step + 1 residual = 0 for i in range(len(V)): residual = residual + (V[i] - u[1:-1][i])**2 #print step #print residual if (step > ntmax): break if (np.sqrt(residual) < 0.0000001): break return x, u
# Generate Random b vector for system to solve with #b = np.random.rand(n,1) b = np.ones((n, 1)) # -------------------------------------------------------------------------------------------------------------- # Solve using each method # -------------------------------------------------------------------------------------------------------------- # TEST: Solve using inverse if PRINT_ENABLE: x = (np.linalg.inv(A) @ b) print("Inverse, x=\t\t\t\t" + "{}".format(x[0:3].T)) # -------------------------------------------------------------------------------------------------------------- # Solve using CG (w/ matrix multiplies) start = time.time() x, iters[idx, 0] = cg.conjugateGradient(A, b, tol, maxIters) end = time.time() times[idx, 0] = end - start if PRINT_ENABLE: print("CG, x=\t\t\t\t\t" + "{}".format(x[0:3].T)) # Solve using CG (w/ FFTs, w/o matrix multiplies - efficient) start = time.time() x, iters[idx, 1] = cg.cg_Toep_FFTmin(A, b, tol, maxIters) end = time.time() times[idx, 1] = end - start if PRINT_ENABLE: print("CG (fftmim), x=\t\t\t" + "{}".format(x[0:3].T)) # -------------------------------------------------------------------------------------------------------------- # Solve using PCG w/ Strang (w/ matrix multiplies)
def hessian(x, Q): val = numpy.matrix([ [math.exp(x[0, 0] / 10.0), 0.0], [0.0, 4.0 * math.exp(2.0 * x[1, 0] / 10.0)] ]) return Q + ((1.0 / (100.0)) * val) if __name__ == '__main__': Q = numpy.matrix([ [2, 1], [1, 2] ]) b = numpy.matrix([ [10], [10] ]) objectiveLambda = lambda x: objective(x, Q, b) gradientLambda = lambda x: gradient(x, Q, b) hessianLambda = lambda x: hessian(x, Q) guess = numpy.matrix([[10.0], [10.0]]) # guess = numpy.matrix([[1000000.0], [1000000.0]]) res = conjugateGradient.conjugateGradient(guess, objectiveLambda, gradientLambda, hessianLambda) x, objectiveValue, g, xHistory, objectiveHistory = res print(xHistory) print(objectiveHistory)
def main(): # read the train file from first arugment train_file = sys.argv[1] #train_file='../data/covtype.scale.trn.libsvm' # read the test file from second argument test_file = sys.argv[2] #test_file = '../data/covtype.scale.tst.libsvm' # You can use load_svmlight_file to load data from train_file and test_file X_train, y_train = load_svmlight_file(train_file) X_test, y_test = load_svmlight_file(test_file) # You can use cg.ConjugateGradient(X, I, grad, lambda_) # Main entry point to the program X_train = sparse.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test = sparse.hstack([X_test, np.ones((X_test.shape[0], 1))]) X = sparse.csr_matrix(X_train) X_test = sparse.csr_matrix(X_test) y = sparse.csr_matrix(y_train).transpose() y_test = sparse.csr_matrix(y_test).transpose() #set global hyper parameter if sys.argv[1] == "covtype.scale.trn.libsvm": lambda_ = 3631.3203125 optimal_loss = 2541.664519 five_fold_CV = 75.6661 optimal_function_value = 2541.664519 else: lambda_ = 7230.875 optimal_loss = 669.664812 five_fold_CV = 97.3655 optimal_function_value = 669.664812 #SGD #set local sgd hyper parameter print('starting SGD...') n_batch = 1000 beta = 0 lr = 0.001 w = np.zeros((X_train.shape[1])) n = X_train.shape[0] sgd_grad = [] sgd_time = [] sgd_rel = [] sgd_test_acc = [] epoch = 180 start = time.time() #redefine learaning rate for i in range(epoch): gamma_t = lr / (1 + beta * i) batch_ = np.random.permutation(n) #shuffle for j in range(n // n_batch): #make batch idx = batch_[j * n_batch:(j + 1) * n_batch] X_bc = X[idx] y_bc = y[idx] grad = get_grad(w, lambda_, n, X_bc, y_bc, n_batch) #comput gradient w = w - gamma_t * grad #update gradient t = time.time() - start sgd_time.append(t) # append to time list grad_ = np.linalg.norm(grad) # get gradient value sgd_grad.append(grad_) rel = (get_loss(w, lambda_, X_test, y_test, n_batch) - optimal_loss) / optimal_loss # get relative func value sgd_rel.append(rel) test_acc = get_acc(w, lambda_, X_test, y_test, n_batch) # get test accuracy sgd_test_acc.append(test_acc) print("SGD : final_time: {}, fina_test_acc: {}".format( time.time() - start, sgd_test_acc[-1])) #plot SGD ''' plt.plot(sgd_time, sgd_grad) plt.xlabel("time") plt.ylabel("grad") plt.title("SGD") plt.show() plt.plot(sgd_time, sgd_rel) plt.xlabel("time") plt.ylabel("relative function") plt.title("SGD") plt.show() plt.plot(sgd_time, sgd_test_acc) plt.xlabel("time") plt.ylabel("test_acc") plt.title("SGD") plt.show() ''' print('starting Newton...') #Newton #set local newton hyper parameter epoch = 50 n_batch = 1000 beta = 0.0001 lr = 0.001 w = np.zeros((X_train.shape[1])) n = X_train.shape[0] nt_grad = [] nt_time = [] nt_rel = [] newton_time = time.time() nt_test_acc = [] w = np.zeros((X_train.shape[1])) n = X_train.shape[0] for i in range(epoch): gamma_t = lr / (1 + beta * i) hessian_total = np.zeros(w.shape) I_ = [] #init I list to compute conjgate gradient for j in range(n // n_batch): X_bc = X[j * n_batch:(j + 1) * n_batch] #make X_batch y_bc = y[j * n_batch:(j + 1) * n_batch] #make y_batch hessian, I = get_hessian(w, lambda_, n, X_bc, y_bc) # get hessian hessian_total += hessian I_.append(I) I_ = np.concatenate(I_) hessian_total += w delta, _ = cg.conjugateGradient( X, I_, hessian_total, lambda_) #get update value from conjugateGradient w = w + delta #update w t = time.time() - newton_time nt_time.append(t) # append to time list grad_ = np.linalg.norm(hessian_total) # get gradient value nt_grad.append(grad_) rel = (get_loss(w, lambda_, X_test, y_test, n_batch) - optimal_loss) / optimal_loss # get relative func value nt_rel.append(rel) test_acc = get_acc(w, lambda_, X_test, y_test, n_batch) # get test accuracy nt_test_acc.append(test_acc) final_time = time.time() - newton_time print("final_time: {}, fina_test_acc: {}".format(final_time, nt_test_acc[-1])) #plot '''
def main(): # read the train file from first arugment train_file = sys.argv[1] # read the test file from second argument test_file = sys.argv[2] # Check input arguments if os.path.basename(train_file) == 'covtype.scale.trn.libsvm' and os.path.basename(test_file) == 'covtype.scale.tst.libsvm': DATA = 'covtype' elif os.path.basename(train_file) == 'realsim.scale.trn.libsvm' and os.path.basename(test_file) == 'realsim.scale.tst.libsvm': DATA = 'realsim' else: raise(ValueError('Invalid Data')) # You can use load_svmlight_file to load data from train_file and test_file # X_train, y_train = load_svmlight_file(train_file) X_train, y_train = load_svmlight_file(train_file) X_test, y_test = load_svmlight_file(test_file) # You can use cg.ConjugateGradient(X, I, grad, lambda_) lambda_val = LAMBDA_DICT[DATA] true_val = LOSS_VAL_DICT[DATA] print('========= Date: {} ========='.format(DATA)) # Pegasos(mini_SGD) print('========= Pegasos solver =========') # Set parameters epoch = 2000 batch_size = 1000 lr = 0.001 beta = lr/100 N = X_train.shape[0] w = np.zeros((X_train.shape[1])) A = np.array(X_train.dot(w)) I = np.array(range(len(A))) I = I[1 - y_train * A > 0] Pegasos_grad = [] Pegasos_loss = [] Pegasos_rel = [] Pegasos_acc = [] Pegasos_time = [] start_time = time.perf_counter() for i in range(epoch + 1): gamma_t = lr / (1 + beta * i) #mini_batches = create_mini_batches(X_train, y_train, batch_size) grad_norm = [] total_loss = [] # Iterate mini-batch SGD (1 batch for 1 epoch) # for batch in mini_batches: idx = np.array(range(X_train.shape[0])) np.random.shuffle(idx) batch_idx = idx[0:batch_size] X = X_train[batch_idx] y = y_train[batch_idx] grad = get_gradient(X, y, w, N, batch_size, lambda_val) w = w - gamma_t * grad grad_norm.append(np.linalg.norm(grad)) total_loss.append(get_loss(X, y, w, N)) # Calculate target values func_val = 1 / 2 * w.dot(w) + lambda_val * np.sum(total_loss) * (X_train.shape[0] / batch_size) relative_val = (func_val - true_val) / true_val grad_total = np.sum(grad_norm) * (X_train.shape[0] / batch_size) test_acc = get_accuracy(X_test, y_test, w, N) Pegasos_grad.append(grad_total) Pegasos_loss.append(np.sum(total_loss)) Pegasos_rel.append(relative_val) Pegasos_acc.append(test_acc) Pegasos_time.append(time.perf_counter() - start_time) if i % 200 == 0 or i == epoch: print('Epoch: {} with Loss: {:.4f}'.format(i, np.sum(total_loss))) print(' Grad: {:.4f} | Rel: {:.4f} | Acc: {:.4f} '.format(grad_total, relative_val, test_acc)) # Save output figures plt.figure() plt.plot(Pegasos_time, Pegasos_grad) plt.xlabel('time') plt.ylabel('grad') plt.title('{}_Pegasos: grad over time'.format(DATA)) plt.savefig('out/{}_Pegasos_grad.png'.format(DATA)) #plt.figure() #plt.plot(Pegasos_time, Pegasos_loss) #plt.xlabel('time') #plt.ylabel('loss') #plt.title('{}_Pegasos: loss over time'.format(DATA)) #plt.savefig('out/{}_Pegasos_loss.png'.format(DATA)) plt.figure() plt.plot(Pegasos_time, Pegasos_rel) plt.xlabel('time') plt.ylabel('relative value') plt.title('{}_Pegasos: relative value over time'.format(DATA)) plt.savefig('out/{}_Pegasos_relative.png'.format(DATA)) plt.figure() plt.plot(Pegasos_time, Pegasos_acc) plt.xlabel('time') plt.ylabel('accuracy') plt.title('{}_Pegasos: accuracy over time'.format(DATA)) plt.savefig('out/{}_Pegasos_accuracy.png'.format(DATA)) # SGD print('========== SGD solver ==========') # Set parameters epoch = 200 batch_size = 1000 lr = 0.001 beta = lr/100 N = X_train.shape[0] w = np.zeros((X_train.shape[1])) A = np.array(X_train.dot(w)) I = np.array(range(len(A))) I = I[1-y_train*A > 0] SGD_grad = [] SGD_loss = [] SGD_rel = [] SGD_acc = [] SGD_time = [] start_time = time.perf_counter() for i in range(epoch+1): gamma_t = lr / (1 + beta * i) mini_batches = create_mini_batches(X_train, y_train, 1000) grad_norm = [] total_loss = [] # Iterate mini-batch SGD (all batches in 1 epoch) for batch in mini_batches: X, y = batch grad = get_gradient(X, y, w, N, batch_size, lambda_val) w = w - gamma_t * grad grad_norm.append(np.linalg.norm(grad)) total_loss.append(get_loss(X, y, w, N)) # Calculate target values func_val = 1 / 2 * w.dot(w) + lambda_val * np.sum(total_loss) relative_val = (func_val - true_val) / true_val grad_total = np.sum(grad_norm) test_acc = get_accuracy(X_test, y_test, w, N) SGD_grad.append(grad_total) SGD_loss.append(np.sum(total_loss)) SGD_rel.append(relative_val) SGD_acc.append(test_acc) SGD_time.append(time.perf_counter() - start_time) if i % 20 == 0 or i == epoch: print('Epoch: {} with Loss: {:.4f}'.format(i, np.sum(total_loss))) print(' Grad: {:.4f} | Rel: {:.4f} | Acc: {:.4f} '.format(grad_total, relative_val, test_acc)) # Save output figures plt.figure() plt.plot(SGD_time, SGD_grad) plt.xlabel('time') plt.ylabel('grad') plt.title('{}_SGD: grad over time'.format(DATA)) plt.savefig('out/{}_SGD_grad.png'.format(DATA)) #plt.figure() #plt.plot(SGD_time, SGD_loss) #plt.xlabel('time') #plt.ylabel('loss') #plt.title('{}_SGD: loss over time'.format(DATA)) #plt.savefig('out/{}_SGD_loss.png'.format(DATA)) plt.figure() plt.plot(SGD_time, SGD_rel) plt.xlabel('time') plt.ylabel('relative value') plt.title('{}_SGD: relative value over time'.format(DATA)) plt.savefig('out/{}_SGD_relative.png'.format(DATA)) plt.figure() plt.plot(SGD_time, SGD_acc) plt.xlabel('time') plt.ylabel('accuracy') plt.title('{}_SGD: accuracy over time'.format(DATA)) plt.savefig('out/{}_SGD_accuracy.png'.format(DATA)) # Newton print('========== Newton solver ==========') # Set parameters epoch = 50 batch_size = 1 # Ignore mini-batch lr = 0.001 # beta = lr/100 N = X_train.shape[0] w = np.zeros((X_train.shape[1])) A = np.array(X_train.dot(w)) I = np.array(range(len(A))) I = I[1 - y_train * A > 0] Newton_grad = [] Newton_loss = [] Newton_rel = [] Newton_acc = [] Newton_time = [] start_time = time.perf_counter() for i in range(epoch+1): # gamma_t = lr / (1+beta*i) grad_norm = [] total_loss = [] # Conduct Newton & conjugate Gradient X, y = X_train, y_train grad = get_gradient(X, y, w, N, batch_size, lambda_val) d, _ = cg.conjugateGradient(X, I, grad, lambda_val) w = w + d grad_norm.append(np.linalg.norm(grad)) total_loss.append(get_loss(X, y, w, N)) # Calculate target values func_val = 1 / 2 * w.dot(w) + lambda_val * np.sum(total_loss) relative_val = (func_val - true_val) / true_val grad_total = np.mean(grad_norm) test_acc = get_accuracy(X_test, y_test, w, N) Newton_grad.append(grad_total) Newton_loss.append(np.sum(total_loss)) Newton_rel.append(relative_val) Newton_acc.append(test_acc) Newton_time.append(time.perf_counter() - start_time) if i % 5 == 0 or i == epoch: print('Epoch: {} with Loss: {:.4f}'.format(i, np.sum(total_loss))) print(' Grad: {:.4f} | Rel: {:.4f} | Acc: {:.4f} '.format(grad_total, relative_val, test_acc)) plt.figure() plt.plot(Newton_time, Newton_grad) plt.xlabel('time') plt.ylabel('grad') plt.title('{}_Newton: grad over time'.format(DATA)) plt.savefig('out/{}_Newton_grad.png'.format(DATA)) #plt.figure() #plt.plot(Newton_time, Newton_loss) #plt.xlabel('time') #plt.ylabel('loss') #plt.title('{}_Newton: loss over time'.format(DATA)) #plt.savefig('out/{}_Newton_loss.png'.format(DATA)) plt.figure() plt.plot(Newton_time, Newton_rel) plt.xlabel('time') plt.ylabel('relative value') plt.title('{}_Newton: relative value over time'.format(DATA)) plt.savefig('out/{}_Newton_relative.png'.format(DATA)) plt.figure() plt.plot(Newton_time, Newton_acc) plt.xlabel('time') plt.ylabel('accuracy') plt.title('{}_Newton: accuracy over time'.format(DATA)) plt.savefig('out/{}_Newton_accuracy.png'.format(DATA)) # Save all methods in one figure plt.figure() plt.plot(np.log(Pegasos_time), Pegasos_grad) plt.plot(np.log(SGD_time), SGD_grad) plt.plot(np.log(Newton_time), Newton_grad) plt.xlabel('log(time)') plt.ylabel('grad') plt.legend(['Pegasos', 'SGD', 'Newton']) plt.title('{}_Result: grad over time'.format(DATA)) plt.savefig('out/{}_Result_grad.png'.format(DATA)) #plt.figure() #plt.plot(np.log(Pegasos_time), Pegasos_loss) #plt.plot(np.log(SGD_time), SGD_loss) #plt.plot(np.log(Newton_time), Newton_loss) #plt.xlabel('log(time)') #plt.ylabel('loss') #plt.legend(['Pegasos', 'SGD', 'Newton']) #plt.title('{}_Result: loss over time'.format(DATA)) #plt.savefig('out/{}_Result_loss.png'.format(DATA)) plt.figure() plt.plot(np.log(Pegasos_time), Pegasos_rel) plt.plot(np.log(SGD_time), SGD_rel) plt.plot(np.log(Newton_time), Newton_rel) plt.xlabel('log(time)') plt.ylabel('relative value') plt.legend(['Pegasos', 'SGD', 'Newton']) plt.title('{}_Result: relative value over time'.format(DATA)) plt.savefig('out/{}_Result_relative.png'.format(DATA)) plt.figure() plt.plot(np.log(Pegasos_time), Pegasos_acc) plt.plot(np.log(SGD_time), SGD_acc) plt.plot(np.log(Newton_time), Newton_acc) plt.xlabel('log(time)') plt.ylabel('accuracy') plt.legend(['Pegasos', 'SGD', 'Newton']) plt.title('{}_Result: accuracy over time'.format(DATA)) plt.savefig('out/{}_Result_accuracy.png'.format(DATA))
# plt.legend() # 有惩罚项的梯度下降法 plt.subplot(233) # plt.ylim(-1.5, 1.5) plt.plot(x_origin, y_origin, c="b", label="$\sin(2\pi x)$") plt.scatter(x_train, y_train, edgecolor="g", facecolor="none", s=50, label="trainning data") ggd_theta = g_gradD.g_gradientDescent( x_poly, y_train, 0.1, theta, 1e-7, p_lambda) y_test = x_test @ ggd_theta plt.plot(x_origin, y_test, "r", label = "g_gradientDescent") print("数据集大小为%d,拟合多项式次数为%d时,惩罚项系数lambda为%f的有惩罚项的梯度下降法系数为" % (sample_size, j, p_lambda)) print(ggd_theta) plt.legend() # 共轭梯度法 plt.subplot(234) # plt.ylim(-1.5, 1.5) plt.plot(x_origin, y_origin, c="b", label="$\sin(2\pi x)$") plt.scatter(x_train, y_train, edgecolor="g", facecolor="none", s=50, label="trainning data") cg_theta = cg.conjugateGradient(x_poly, y_train, p_lambda, 1e-7) y_test = x_test @ cg_theta plt.plot(x_origin, y_test, "r", label = "conjugateGradient") print("数据集大小为%d,拟合多项式次数为%d时,惩罚项系数lambda为%f的共轭梯度法系数为" % (sample_size, j, p_lambda)) print(cg_theta) plt.legend() plt.show()