def CoF_compute_search_pow_flex(P_con, H_a, is_dual_hop, rate_sec_hop=[], mod_scheme='sym_mod', quan_scheme='sym_quan', beta=[]): (M, L) = (H_a.nrows(), H_a.ncols()) global P_Search_Alg if beta == []: beta = vector(RR, [1] * L) cof_pow = lambda x: -CoF_compute_fixed_pow_flex( x, P_con, False, H_a, is_dual_hop, rate_sec_hop, mod_scheme, quan_scheme, beta) cof_pow_beta = lambda x: -CoF_compute_fixed_pow_flex( x[0:L], P_con, False, H_a, is_dual_hop, rate_sec_hop, mod_scheme, quan_scheme, vector(RR, x[L:L + M])) #Pranges = ((P_con/brute_number, P_con), )*L # (slice(0, P_con+0.1, P_con/brute_number), )*L Pranges = ((0, P_con), ) * L initial_guess = [0.5 * P_con] * L try: if P_Search_Alg == 'brute': res_cof = optimize.brute(cof_pow, Pranges, Ns=brute_number, full_output=True, finish=None) P_opt = res_cof[0] sum_rate_opt = -res_cof[1] # negative! see minus sign in cof_pow elif P_Search_Alg == 'TNC': #res_cof = optimize.minimize(cof_pow, initial_guess, method='TNC', bounds=Pranges, options={'maxiter': 400, 'approx_grad': True}) #P_opt = list(res_cof.x) #sum_rate_opt = -res_cof.fun # negative! see minus sign in cof_pow res_cof = optimize.fmin_tnc(cof_pow, initial_guess, bounds=list(Pranges), approx_grad=True, epsilon=1, stepmx=10) P_opt = res_cof[0] sum_rate_opt = CoF_compute_fixed_pow_flex(P_opt, P_con, False, H_a, is_dual_hop, rate_sec_hop, mod_scheme, quan_scheme, beta) elif P_Search_Alg == 'anneal': res_cof = optimize.anneal(cof_pow, initial_guess, schedule='cauchy', T0=1, Tf=1e-6, \ full_output=True, maxiter=30, lower=[1, 1], upper=[P_con, P_con], dwell=30, disp=True) P_opt = list(res_cof[0]) sum_rate_opt = -res_cof[1] elif P_Search_Alg == 'brute_fmin': res_brute = optimize.brute(cof_pow, Pranges, Ns=brute_fmin_number, full_output=True, finish=None) P_brute_opt = res_brute[0] sum_rate_brute = -res_brute[ 1] # negative! see minus sign in cof_pow res_fmin = optimize.fmin(cof_pow, P_brute_opt, xtol=1, ftol=0.01, maxiter=brute_fmin_maxiter, full_output=True) #P_fmin_opt = res_fmin[0] P_opt = res_fmin[0] sum_rate_opt = -res_fmin[1] elif P_Search_Alg == 'brute_brute': res_brute1 = optimize.brute(cof_pow, Pranges, Ns=brute_brute_first_number, full_output=True, finish=None) P_brute_opt1 = res_brute1[0] sum_rate_brute1 = -res_brute1[ 1] # negative! see minus sign in cof_pow Pranges_brute_2 = tuple([ (max(0, P_i - P_con / brute_brute_first_number), min(P_con, P_i + P_con / brute_brute_first_number)) for P_i in P_brute_opt1 ]) res_brute2 = optimize.brute(cof_pow, Pranges_brute_2, Ns=brute_brute_second_number, full_output=True, finish=None) P_brute_opt2 = res_brute2[0] sum_rate_brute2 = -res_brute2[ 1] # negative! see minus sign in cof_pow sum_rate_opt = sum_rate_brute2 elif P_Search_Alg == 'brute_fmin_beta': res_brute = optimize.brute(cof_pow, Pranges, Ns=brute_fmin_number, full_output=True, finish=None) P_brute_opt = res_brute[0] sum_rate_brute = -res_brute[ 1] # negative! see minus sign in cof_pow res_fmin_beta = optimize.fmin(cof_pow_beta, list(P_brute_opt) + [1] * M, xtol=0.01, ftol=0.01, maxiter=brute_fmin_maxiter * 50, full_output=True) P_fmin_opt = res_fmin_beta[0] sum_rate_opt = -res_fmin_beta[1] elif P_Search_Alg == 'brute_fmin_cobyla': res_brute = optimize.brute(cof_pow, Pranges, Ns=brute_fmin_number, full_output=True, finish=None) P_brute_opt = res_brute[0] def pow_constraint(x): return x sum_rate_brute = -res_brute[ 1] # negative! see minus sign in cof_pow p_cobyla = optimize.fmin_cobyla(cof_pow, P_brute_opt, pow_constraint, maxfun=100) sum_rate_fmin_cobyla = CoF_compute_fixed_pow_flex( p_cobyla, P_con, False, H_a, is_dual_hop, rate_sec_hop, mod_scheme, quan_scheme, beta) sum_rate_opt = sum_rate_fmin_cobyla elif P_Search_Alg == 'brute_fmin_cobyla_beta': res_brute = optimize.brute(cof_pow, Pranges, Ns=brute_fmin_number, full_output=True, finish=None) P_brute_opt = res_brute[0] def pow_beta_constraint(x): return x sum_rate_brute = -res_brute[ 1] # negative! see minus sign in cof_pow p_cobyla = optimize.fmin_cobyla(cof_pow_beta, list(P_brute_opt) + [1] * M, pow_beta_constraint, maxfun=200) sum_rate_fmin_cobyla = CoF_compute_fixed_pow_flex( p_cobyla, P_con, False, H_a, is_dual_hop, rate_sec_hop, mod_scheme, quan_scheme, beta) sum_rate_opt = sum_rate_fmin_cobyla #Add differential evolution elif P_Search_Alg == "differential_evolution": bounds = ((0, P_con), ) * L res_brute = optimize.differential_evolution(cof_pow, bounds) P_opt = res_brute.x sum_rate_opt = -res_brute.fun #Add Genetic Algorithm elif P_Search_Alg == "genetic": res_cof = GeneticAlgorithm(P_con, H_a) P_opt = res_cof[0] sum_rate_opt = res_cof[1] #The Genetic Algorithm End else: raise Exception('error: algorithm not supported') except: print 'error in search algorithms' raise return sum_rate_opt
#FUNCIÓN DE COSTE REGULARIZADA (lambda) def coste2(O, X, Y, lam): sol = (coste(O, X, Y) + (lam/(2*m))*(O**2).sum()) return sol #FUNCIÓN DE GRADIENTE REGULARIZADA (lambda) def gradiente2(O, X, Y, lam): AuxO = np.hstack([np.zeros([1]), O[1:,]]) return (((X.T.dot(sigmoide(X.dot(O))-Y))/m) + (lam/m)*O) X = XArr.copy() X = np.insert(X, 0, 1, axis = 1) start = time.time() thetas = np.ones(len(X[0])) result = opt.fmin_tnc(func = coste2, x0 = thetas, fprime = gradiente2, args = (X, YArr, 0.1)) thetas_opt = result[0] end = time.time() print("EXE TIME:", end - start, "seconds") print("OPT THETAS:\n", thetas_opt) #Evaluación de los resultados obtenidos en las predicciones con las thetas óptimas def evalua(thetas, X, y): thetasMat = np.matrix(thetas) z = np.dot(thetasMat,X.transpose()) resultados = sigmoide(z) resultados[resultados >= 0.5] = 1 resultados[resultados < 0.5] = 0 admitidosPred = sum(np.where(resultados == y)).shape[0] return (admitidosPred / len(y)) * 100
def gradient(theta, RegParam, X, Y): m = max(X.shape) grad = (np.dot(X.T, (sigmoid(np.dot(X, theta.T)) - Y)) / m).T grad[0,1:max(grad.shape)]=grad[0,1:max(grad.shape)] \ +RegParam*theta[1:len(theta)]/m # very important: -1 is the index of the last element in array, (i.e., grad[0,-1]) # but when dealing with intervals, 0:-1 is not the whole size, because intervals in # python is [0,-1) closed, and open at the end, so it won't include the last element. return grad #======================Parameters======================================================== power_order = 6 RegParam = 0.9 X, n, m = mapFeature(x, power_order) initial_theta = np.zeros(n) #=====================Obtain parameters that Minimizes the costfunction ======================================= result = opt.fmin_tnc(func=costFun, x0=initial_theta, fprime=gradient, args=(RegParam, X, y)) thetaRes = result[0] fmin = minimize(fun=costFun, x0=initial_theta, args=(RegParam, X, y), method='TNC', jac=gradient) theta = fmin.x minCost1 = costFun(theta, RegParam, X, y) minCost = costFun(thetaRes, RegParam, X, y)
def fit(self, x, y, theta): opt_weights = fmin_tnc(func=self.cost_function, x0=theta, fprime=self.gradient, args=(x, y.flatten())) self.w_ = opt_weights[0] return self
def minimize_constrained(func, cons, x0, gradient=None, algorithm='default', **args): r""" Minimize a function with constraints. INPUT: - ``func`` -- Either a symbolic function, or a Python function whose argument is a tuple with n components - ``cons`` -- constraints. This should be either a function or list of functions that must be positive. Alternatively, the constraints can be specified as a list of intervals that define the region we are minimizing in. If the constraints are specified as functions, the functions should be functions of a tuple with `n` components (assuming `n` variables). If the constraints are specified as a list of intervals and there are no constraints for a given variable, that component can be (``None``, ``None``). - ``x0`` -- Initial point for finding minimum - ``algorithm`` -- Optional, specify the algorithm to use: - ``'default'`` -- default choices - ``'l-bfgs-b'`` -- only effective if you specify bound constraints. See [ZBN97]_. - ``gradient`` -- Optional gradient function. This will be computed automatically for symbolic functions. This is only used when the constraints are specified as a list of intervals. EXAMPLES: Let us maximize `x + y - 50` subject to the following constraints: `50x + 24y \leq 2400`, `30x + 33y \leq 2100`, `x \geq 45`, and `y \geq 5`:: sage: y = var('y') sage: f = lambda p: -p[0]-p[1]+50 sage: c_1 = lambda p: p[0]-45 sage: c_2 = lambda p: p[1]-5 sage: c_3 = lambda p: -50*p[0]-24*p[1]+2400 sage: c_4 = lambda p: -30*p[0]-33*p[1]+2100 sage: a = minimize_constrained(f,[c_1,c_2,c_3,c_4],[2,3]) sage: a (45.0, 6.25) Let's find a minimum of `\sin(xy)`:: sage: x,y = var('x y') sage: f = sin(x*y) sage: minimize_constrained(f, [(None,None),(4,10)],[5,5]) (4.8..., 4.8...) Check, if L-BFGS-B finds the same minimum:: sage: minimize_constrained(f, [(None,None),(4,10)],[5,5], algorithm='l-bfgs-b') (4.7..., 4.9...) Rosenbrock function, [http://en.wikipedia.org/wiki/Rosenbrock_function]:: sage: from scipy.optimize import rosen, rosen_der sage: minimize_constrained(rosen, [(-50,-10),(5,10)],[1,1],gradient=rosen_der,algorithm='l-bfgs-b') (-10.0, 10.0) sage: minimize_constrained(rosen, [(-50,-10),(5,10)],[1,1],algorithm='l-bfgs-b') (-10.0, 10.0) REFERENCES: .. [ZBN97] C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B, FORTRAN routines for large scale bound constrained optimization. ACM Transactions on Mathematical Software, Vol 23, Num. 4, pp.550--560, 1997. """ from sage.symbolic.expression import Expression import scipy from scipy import optimize function_type = type(lambda x, y: x + y) if isinstance(func, Expression): var_list = func.variables() var_names = map(str, var_list) fast_f = func._fast_float_(*var_names) f = lambda p: fast_f(*p) gradient_list = func.gradient() fast_gradient_functions = [ gradient_list[i]._fast_float_(*var_names) for i in xrange(len(gradient_list)) ] gradient = lambda p: scipy.array( [a(*p) for a in fast_gradient_functions]) else: f = func if isinstance(cons, list): if isinstance(cons[0], tuple) or isinstance(cons[0], list) or cons[0] == None: if gradient != None: if algorithm == 'l-bfgs-b': min = optimize.fmin_l_bfgs_b(f, x0, gradient, bounds=cons, iprint=-1, **args)[0] else: min = optimize.fmin_tnc(f, x0, gradient, bounds=cons, messages=0, **args)[0] else: if algorithm == 'l-bfgs-b': min = optimize.fmin_l_bfgs_b(f, x0, approx_grad=True, bounds=cons, iprint=-1, **args)[0] else: min = optimize.fmin_tnc(f, x0, approx_grad=True, bounds=cons, messages=0, **args)[0] elif isinstance(cons[0], function_type): min = optimize.fmin_cobyla(f, x0, cons, iprint=0, **args) elif isinstance(cons, function_type): min = optimize.fmin_cobyla(f, x0, cons, iprint=0, **args) return vector(RDF, min)
from __future__ import division import scipy.optimize as op import pandas as pd import numpy as np def CostFunc(theta,X,y): m,n = X.shape Sigmoid = 1/(1+np.exp(-(X.dot(theta.T)))) L1 = np.log(Sigmoid) L2 = np.log(1-Sigmoid) J2 = (1/m)*np.sum(-y.T.dot(L1) - ((1-y).T.dot(L2))) grad = (Sigmoid-y).dot(X)*(1/m) return J2,grad if __name__ == "__main__": data = np.loadtxt(open("ex2data1.txt", "r"), delimiter=",") X = data[:, 0:2] y = data[:, 2] m, n = X.shape X = np.hstack((np.ones((m, 1)), X)) theta = np.zeros(n + 1) theta1, nfeval, rc = op.fmin_tnc(func = CostFunc, x0 = theta, args =(X,y),messages=0) print(theta1)
plt.xlabel('N of iterations') plt.ylabel('Cost Function') plt.title('Cost function evolution') plt.figure(3) plt.plot(Witer) plt.xlabel('iteration') plt.ylabel('Weights') plt.grid('on') plt.title('Weight evolution') plt.show() Xnew = X.T #Wnew=np.array([-25,0.222222222,0.222222222]) #why does setting this completely change accuracy????? W_optimization = opt.fmin_tnc(func=Logistic_Cost, x0=Wnew, fprime=Gradient, args=(X, Y)) min_cost = Logistic_Cost(W_optimization[0], X, Y) W_opt = np.reshape(W_optimization[0], (1, 3)) def Predict_Admission(X, W_opt): probability = sigmoid(np.dot(W_opt, X)) size = np.size(probability) Admission_result = np.zeros(size) print(probability) for l in range(size): if probability[0, l] > 0.5: Admission_result[l] = 1 else: Admission_result[l] = 0
# 下面是用梯度下降去完成,但是学习率自己去确定 theta0 = npy.zeros((n + 1, 1)) # 初始θ设为0 outloop = 10000 #设置最大迭代次数3000 alfa = 0.009 #学习率为0.003 cost_list = npy.zeros((int(outloop / 100), 2)) for i in range(outloop): cost, grad = costFunction(X_1, Y, theta0) theta0 = theta0 - alfa * grad if i % 100 == 0: cost_list[int(i / 100), 0] = i cost_list[int(i / 100), 1] = cost print(theta0) # 下面用BFGS实现 theta = npy.zeros((n + 1, 1)) # 初始θ设为0 result = op.fmin_tnc(func=costFun, x0=theta, fprime=gradFun, args=(X_1, Y)) theta = result[0] print(theta) plot_x = npy.asarray([[X_1[:, 1].min() - 2], [X_1[:, 2].max() + 2]]) plot_y = npy.asarray((-1 / theta[2]) * (theta[1] * plot_x + theta[0])) plt.plot(plot_x, plot_y, '-') plt.show() testScore = [1, 65, 85] testScore = npy.asarray(testScore) prob = sigmoid(npy.dot(testScore, theta)) print( "For a student with scores 65 and 85 ,we predict an admission probability of %f" % prob)
def gradient_function(theta, x, y, m, lambda_reg): h = sigmoid(x.dot(theta)).reshape(-1, 1) y = y.reshape(m, 1) gradient = np.zeros((theta.shape[0], 1)) gradient = x.T.dot(h - y) / m theta = theta.reshape((theta.shape[0], 1)) gradient[1:] = gradient[1:] + (lambda_reg / m) * theta[1:] return gradient print("Initial cost = " + str(cost_function(theta, x_poly, y, size, lambda_reg))) result = opt.fmin_tnc(func=cost_function, x0=theta, fprime=gradient_function, args=(x_poly, y, size, lambda_reg)) theta_opt = result[0] lin1 = np.linspace(-0.75, 1.00, 50) lin2 = np.linspace(-0.75, 1.00, 50) z = np.zeros((len(lin1), len(lin2))) def plotting_preprocessing(lin1, lin2, theta_opt): for i in range(len(lin1)): for j in range(len(lin2)): z[i, j] = np.dot( polynomial.fit_transform(np.column_stack((lin1[i], lin2[j]))), theta_opt) return z
def CrossTrack(self,x,y,wx,wy): u = int(np.round(x)) v = int(np.round(y)) #print 'initial guess: {0},{1}'.format(x + 0.5*wx, (self.oh - (y + wy*0.5))) n = self.current #print n, n - 1 if self.track_image is None: self.track_image = [n - 1, self.getFrame(n - 1, mode = 'whateber')] if not (self.image is None): if self.image[0] == n: frame_next = copy.copy(self.image[1]) else: frame_next = self.getFrame(n, mode = 'whateber') else: frame_next = self.getFrame(n, mode = 'whateber') # set up the ROI for tracking ro = self.track_image[1][v:v+wy, u:u+wx] #self.ro = copy.copy(ro) if self.v is not None: #print 'adjusting...' u = int(np.round(x + self.v[0])) v = int(np.round(y - self.v[1])) y -= self.v[1] x += self.v[0] roi = frame_next[v:v+wy, u:u+wx] self.ro = copy.copy(roi) #cv2.imwrite('prev_{0}.png'.format(n-1), ro) #cv2.imwrite('next_{0}.png'.format(n), roi) ro = ro - np.mean(ro) roi = roi - np.mean(roi) b1, g1, r1 = cv2.split(ro) b2, g2, r2 = cv2.split(roi) corr_b = correlate2d(b1, b2, boundary = 'symm', mode = 'same') corr_g = correlate2d(g1, g2, boundary = 'symm', mode = 'same') corr_r = correlate2d(r1, r2, boundary = 'symm', mode = 'same') corr = corr_b + corr_g + corr_r oy, ox = np.unravel_index(np.argmax(corr), corr.shape) # mark discrete estimate with red square self.ro[oy, :] = np.array([0.,0.,255.]) self.ro[:, ox] = np.array([0.,0.,255.]) # get continuous estimate s = RectBivariateSpline(range(corr.shape[0]), range(corr.shape[1]), -corr) sol, nfeval, rc = fmin_tnc(lambda x: s(x[0], x[1]), np.array([float(oy), float(ox)]), approx_grad = True, bounds = [(0., float(corr.shape[0])), (0., float(corr.shape[1]))], disp = 0) oy, ox = sol oy = oy - (ro.shape[0]/2 - 1) ox = -(ox - (ro.shape[1]/2 - 1)) #print 'offset: {0},{1}'.format(ox, oy) self.track_image = [n, frame_next] return np.array([x + 0.5*wx + ox, (self.oh - (y + wy*0.5)) + oy])
m, n = X.shape # Add intercept column of 1s X = np.insert(X, 0, 1, axis=1) test_theta = np.array([-24, 0.2, 0.2]) # Fit the decision boundary line using two optimization functions theta, cost, *res = opt.fmin_bfgs(costFunction, \ test_theta, \ gradFunction, \ (X, y), \ maxiter=400, \ full_output=True) theta, *res= opt.fmin_tnc(costFunction, \ test_theta, \ gradFunction, \ (X, y)) # Visualize the decision boundary plotDecisionBoundary(theta, X, y) # Evaluate the model prob = sigmoid(np.array([1, 45, 85] @ theta)) p = predict(theta, X) #from sklearn.metrics import accuracy_score accuracy = np.mean(p == y) * 100 #accuracy = accuracy_score(y, p)
X = data.iloc[:, 1:cols] y = data.iloc[:, 0:1] # 从数据帧转换成numpy的矩阵格式 X = np.array(X.values) y = np.array(y.values) theta = np.zeros((1, cols-1)) print(X.shape, theta.shape, y.shape) lambdas = 1 print(costReg(theta, X, y, lambdas)) # costs = cost(theta, X, y) # print('cost = ', costs) # 使用scipy库中的优化函数 result = opt.fmin_tnc(func=costReg, x0=theta, fprime=gradientReg, args=(X, y, lambdas)) # print(cost(result[0], X, y)) # print(result) # 预测结果,统计分类准确率 theta_min = np.matrix(result[0]) predictions = predict(theta_min, X) correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)] accuracy = (sum(map(int, correct)) % len(correct)) print('accuracy = {0}%'.format(accuracy))
def gradient(theta, x, y): m = x.shape[0] return ((1 / m) * x.T @ (sigmoid(x @ theta) - y)) data = pd.read_csv('ex2data1.txt', header=None) x = data.iloc[:, 0:2] y = data.iloc[:, 2] #mask = y == 1 #adm = plt.scatter(x[mask][0].values, x[mask][1].values) #not_adm = plt.scatter(x[~mask][0].values, x[~mask][1].values) m, n = x.shape x = np.hstack((np.ones((m, 1)), x)) y = y[:, np.newaxis] theta = np.zeros((n + 1, 1)) j = costFunc(theta, x, y) print(j) temp = opt.fmin_tnc(func=costFunc, x0=theta.flatten(), fprime=gradient, args=(x, y.flatten())) theta_optimized = temp[0] print(theta_optimized) j = costFunc(theta_optimized[:, np.newaxis], x, y) print(j)
# 结束 特征映射 # 数据和参数的调整 cols = data.shape[1] X = data.iloc[:, 1:cols] y = data.iloc[:, 0:1] X = np.array(X.values) y = np.array(y.values) theta = np.zeros(11) learningRate = 1 # 结束 数据和参数的调整 print(J_with_reg(theta, X, y, learningRate)) # 初始theta的损失函数 print(gradient_with_reg(theta, X, y, learningRate)) # 初始theta的带正则化项的梯度 result = opt.fmin_tnc(func=J_with_reg, x0=theta, fprime=gradient_with_reg, args=(X, y, learningRate)) # 训练过程;用最优化函数寻找最优theta opt_theta = result[0] # 最优theta print('opt_theta:{}'.format(opt_theta)) accuracy(opt_theta, X, y) # 最优theta的正确率 # 调用 sklearn 线性回归包 model = linear_model.LogisticRegression() model.fit(X, y.ravel()) print('accuracy of sklearn:{}'.format(model.score(X, y))) # 结束 调用 sklearn 线性回归包
# In[5]: def gradient(theta, X, Y): gradient = np.dot((h(theta, X) - Y), X) / Y.shape[0] return gradient # ## 5. Optimization # In[16]: import scipy.optimize as opt result = opt.fmin_tnc(func=cost_function, x0=theta, fprime=gradient, args=(X, Y)) optimal_theta = np.array([result[0]]) print "Cost using optimal theta:", cost_function(optimal_theta, X, Y) # ## 6. Prediction # In[15]: def predict(theta, input): return h(theta, input)[0] # predict probability with which a student with Exam1 score of 45, and Exam2 score of 85 will be admitted print predict(optimal_theta, np.array([1, 45, 85]))
return grad''' #function to calc gradientDescent by function Matrics def gradientDescent(theta, X, y): thetav = np.matrix(theta) Xv = np.matrix(X) yv = np.matrix(y) return (X.T * (sigmoid(Xv * thetav.T) - yv)) / len(X) #to find the miniumim theta using scipy.optimize by gradent Desecnt # هنا بيغنيك عن الفاااااااااااااااااااا و عدد اللفات طرح كل ثيتا من اللي قبلها في كل لفه import scipy.optimize as opt result = opt.fmin_tnc(func=costFunction, x0=theta, fprime=gradientDescent, args=(X, y)) CostAfterOptimize = costFunction(result[0], X, y) print() print('cost after optimize = ', CostAfterOptimize) print() # to predict the value and checkk def predict(theta, X, y): return [1 if x >= 0.5 else 0 for x in sigmoid(X * np.matrix(theta).T)] prediction = predict(result[0], X, y)
Y = np.matrix(Y) parameters = int(theta.ravel().shape[1]) grad = np.zeros(parameters) error = sigmoid(X * theta.T) - Y for i in range(parameters): term = np.multiply(error, X[:, i]) grad[i] = np.sum(term) / len(X) return grad # 用SciPy's truncated newton(TNC)实现寻找最优参数 result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, Y)) print(result) print(cost(result[0], X, Y)) theta = result[0] # 画出决策边界 data_visual(data, names, theta) # 计算预测效果 def predict(theta, X): probability = sigmoid(X * theta.T) return [1 if x >= 0.5 else 0 for x in probability] theta_min = np.matrix(result[0])
for j in range(0, i): data['F' + str(i) + str(j)] = np.power(x1, i - j) * np.power(x2, j) data.drop('Test 1', axis=1, inplace=True) data.drop('Test 2', axis=1, inplace=True) print data.head() # set X and y (remember from above that we moved the label to column 0) cols = data.shape[1] X2 = data.iloc[:, 1:cols] y2 = data.iloc[:, 0:1] # convert to numpy arrays and initalize the parameter array theta X2 = np.array(X2.values) y2 = np.array(y2.values) theta2 = np.zeros(11) learningRate = 1 print "origin cost", costReg(theta2, X2, y2, learningRate) result2 = opt.fmin_tnc(func=costReg, x0=theta2, fprime=gradientReg, args=(X2, y2, learningRate)) theta_min = np.matrix(result2[0]) predictions = predict(theta_min, X2) correct = [ 1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y2) ] accuracy = (sum(map(int, correct)) % len(correct)) print 'accuracy = {0}%'.format(accuracy)
def minimize_constrained(func, cons, x0, gradient=None, algorithm='default', **args): r""" Minimize a function with constraints. INPUT: - ``func`` -- Either a symbolic function, or a Python function whose argument is a tuple with n components - ``cons`` -- constraints. This should be either a function or list of functions that must be positive. Alternatively, the constraints can be specified as a list of intervals that define the region we are minimizing in. If the constraints are specified as functions, the functions should be functions of a tuple with `n` components (assuming `n` variables). If the constraints are specified as a list of intervals and there are no constraints for a given variable, that component can be (``None``, ``None``). - ``x0`` -- Initial point for finding minimum - ``algorithm`` -- Optional, specify the algorithm to use: - ``'default'`` -- default choices - ``'l-bfgs-b'`` -- only effective if you specify bound constraints. See [ZBN1997]_. - ``gradient`` -- Optional gradient function. This will be computed automatically for symbolic functions. This is only used when the constraints are specified as a list of intervals. EXAMPLES: Let us maximize `x + y - 50` subject to the following constraints: `50x + 24y \leq 2400`, `30x + 33y \leq 2100`, `x \geq 45`, and `y \geq 5`:: sage: y = var('y') sage: f = lambda p: -p[0]-p[1]+50 sage: c_1 = lambda p: p[0]-45 sage: c_2 = lambda p: p[1]-5 sage: c_3 = lambda p: -50*p[0]-24*p[1]+2400 sage: c_4 = lambda p: -30*p[0]-33*p[1]+2100 sage: a = minimize_constrained(f,[c_1,c_2,c_3,c_4],[2,3]) sage: a (45.0, 6.25...) Let's find a minimum of `\sin(xy)`:: sage: x,y = var('x y') sage: f = sin(x*y) sage: minimize_constrained(f, [(None,None),(4,10)],[5,5]) (4.8..., 4.8...) Check if L-BFGS-B finds the same minimum:: sage: minimize_constrained(f, [(None,None),(4,10)],[5,5], algorithm='l-bfgs-b') (4.7..., 4.9...) Rosenbrock function (see the :wikipedia:`Rosenbrock_function`):: sage: from scipy.optimize import rosen, rosen_der sage: minimize_constrained(rosen, [(-50,-10),(5,10)],[1,1],gradient=rosen_der,algorithm='l-bfgs-b') (-10.0, 10.0) sage: minimize_constrained(rosen, [(-50,-10),(5,10)],[1,1],algorithm='l-bfgs-b') (-10.0, 10.0) TESTS: Check if :trac:`6592` is fixed:: sage: x, y = var('x y') sage: f = (100 - x) + (1000 - y) sage: c = x + y - 479 # > 0 sage: minimize_constrained(f, [c], [100, 300]) (805.985..., 1005.985...) sage: minimize_constrained(f, c, [100, 300]) (805.985..., 1005.985...) """ from sage.symbolic.expression import Expression import scipy from scipy import optimize function_type = type(lambda x, y: x + y) if isinstance(func, Expression): var_list = func.variables() var_names = [str(_) for _ in var_list] fast_f = func._fast_float_(*var_names) f = lambda p: fast_f(*p) gradient_list = func.gradient() fast_gradient_functions = [ gi._fast_float_(*var_names) for gi in gradient_list ] gradient = lambda p: scipy.array( [a(*p) for a in fast_gradient_functions]) if isinstance(cons, Expression): fast_cons = cons._fast_float_(*var_names) cons = lambda p: scipy.array([fast_cons(*p)]) elif isinstance(cons, list) and isinstance(cons[0], Expression): fast_cons = [ci._fast_float_(*var_names) for ci in cons] cons = lambda p: scipy.array([a(*p) for a in fast_cons]) else: f = func if isinstance(cons, list): if isinstance(cons[0], tuple) or isinstance(cons[0], list) or cons[0] is None: if gradient is not None: if algorithm == 'l-bfgs-b': min = optimize.fmin_l_bfgs_b(f, x0, gradient, bounds=cons, iprint=-1, **args)[0] else: min = optimize.fmin_tnc(f, x0, gradient, bounds=cons, messages=0, **args)[0] else: if algorithm == 'l-bfgs-b': min = optimize.fmin_l_bfgs_b(f, x0, approx_grad=True, bounds=cons, iprint=-1, **args)[0] else: min = optimize.fmin_tnc(f, x0, approx_grad=True, bounds=cons, messages=0, **args)[0] elif isinstance(cons[0], function_type) or isinstance( cons[0], Expression): min = optimize.fmin_cobyla(f, x0, cons, iprint=0, **args) elif isinstance(cons, function_type) or isinstance(cons, Expression): min = optimize.fmin_cobyla(f, x0, cons, iprint=0, **args) return vector(RDF, min)
reg = (lamda / (2 * X.shape[0]) * np.sum(np.power(theta[1:], 2))) # 不对theta_0做归正则化 return np.sum(first - second) / X.shape[0] + reg # print(cost_R(theta, X, y, 1)) def gradient_R(theta, X, y, lamda): iter_ = theta.shape[0] grad = np.zeros(iter_) for j in range(iter_): term = (sigmoid(X @ theta) - y) * X[:, j] if j == 0: grad[j] = np.sum(term) / X.shape[0] else: grad[j] = np.sum(term) / X.shape[0] + (lamda / X.shape[0]) * theta[j] return grad # print(gradient_R(theta, X, y, lamda)) result2 = opt.fmin_tnc(func=cost_R, x0=theta, fprime=gradient_R, args=(X, y, lamda)) # print(result2) theta_final = np.array(result2[0]) predictions = predict(theta_final, X) correct = [1 if a == b else 0 for (a, b) in zip(predictions, y)] accuracy = sum(correct) / len(correct) print('准确率为 %s %%' % (accuracy * 100)) # z注意格式化表达 from sklearn import linear_model # 调用sklearn的线性回归包 model = linear_model.LogisticRegression(penalty='l2', C=1.0) model.fit(X, y) print(model.score(X,y))
plt.show() sizeofFeaturesfromFile = len(FeaturesExtrcatedFromFile) FeaturesArrayures = len(FeaturesExtrcatedFromFile[1, :]) + 1 FeaturesExtrcatedFromFile = np.append(np.ones( (FeaturesExtrcatedFromFile.shape[0], 1)), FeaturesExtrcatedFromFile, axis=1) Theta = np.zeros(FeaturesArrayures) result = opt.fmin_tnc(func=CostOfTheClassification, x0=Theta, fprime=LogisticgradientDescent, args=(FeaturesExtrcatedFromFile, LabelsOftheDataExtractedFromFile)) OptTheta = np.matrix(result[0]) optiCost = CostOfTheClassification(OptTheta, FeaturesExtrcatedFromFile, LabelsOftheDataExtractedFromFile) test = np.matrix([1, 45, 85]) ResultsOftheClassificationByLogisticRegression(OptTheta, test) df = pd.read_csv('ex2data1.txt', names=['Exam1', 'Exam2', 'Classes']) FeaturesExtrcatedFromFile = df.as_matrix(columns=['Exam1', 'Exam2'])
data.drop('Test 1', axis=1, inplace=True) data.drop('Test 2', axis=1, inplace=True) # set X and y (remember from above that we moved the label to column 0) cols = data.shape[1] X = data.iloc[:, 1:cols] y = data.iloc[:, 0:1] # convert to numpy arrays and initalize the parameter array theta X = np.array(X.values) y = np.array(y.values) theta = np.zeros(11) learningRate = 1 result = opt.fmin_tnc(func=costReg, x0=theta, fprime=gradientReg, args=(X, y, learningRate)) print(costReg(theta, X, y, learningRate)) print(result) theta_min = np.matrix(result[0]) predictions = predict(theta_min, X) correct = [ 1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y) ] accuracy = (sum(map(int, correct)) % len(correct)) print('accuracy = {0}%'.format(accuracy))
grad_test = gradient(test_theta, X, y) print('Cost at test theta: \n', cost_test) print('Expected cost (approx): 0.218\n') print('Gradient at test theta: \n', grad_test) print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n') ## ============= Part 3: Optimizing using fminunc ============= # In this exercise, you will use a built-in function (fminunc) to find the # optimal parameters theta. # Run fminunc to obtain the optimal theta # This function returns 3 elements the first contains the solution in this case the optimized theta, the second # is the number of function evaluations the third is an error code result = opt.fmin_tnc(func=costFunction, x0=theta, fprime=gradient, args=(X, y.flatten())) rc = result[2] if rc != 0: exit(rc) thetaOpt = result[0] print(thetaOpt) costOpt = costFunction(thetaOpt[:, np.newaxis], X, y) # Print theta to screen print('Cost at theta found by fminunc: \n', costOpt) print('Expected cost (approx): 0.203\n') print('theta: \n', thetaOpt) print('Expected theta (approx):\n') print(' -25.161\n 0.206\n 0.201\n')
def gradient(theta, X, y): theta = np.matrix(theta) X = np.matrix(X) y = np.matrix(y) parameters = int(theta.shape[1]) temp = np.matrix(np.zeros(theta.shape[1])) error = sigmod(X * theta.T) - y for i in range(parameters): term = np.multiply(error, X[:, i]) temp[0, i] = np.sum(term) / len(X) return temp #print(gradient(theta,X,y)) import scipy.optimize as opt result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y)) # func是要最小化的函数 # x0是最小化函数的自变量 # fprime是最小化的方法 # args元组,是传递给优化函数的参数 # def grdientdescent(X,y,theta,alpha,iters): #试试上面的函数和自己写的哪个好用 # theta = np.matrix(theta) # X = np.matrix(X) # y = np.matrix(y) # temp=np.matrix(np.zeros(X.shape[1])) # parameters=X.shape[1] # for i in range(iters): # error=sigmod(X*theta.T)-y # for j in range(parameters): # term=np.multiply(error,X[:,j]) # temp[0,j]=theta[0,j]-alpha*(1/len(X))*np.sum(term) # theta=temp
decision = h_val[np.abs(h_val['hval'] < 2 * 10**-3)] # 这一步又是什么 return decision.x1, decision.x2 if __name__ == '__main__': path = 'ex2data2.txt' degree = 6 data = pd.read_csv(path, header=None, names=['Test1', 'Test2', 'Accepted']) # print(data.head()) dt = data.copy() # plotData(data) data.insert(3, 'Ones', 1) mapFeature(data['Test1'], data['Test2']) # print(data.head()) # 整理出数据 cols = data.shape[1] X = data.iloc[:, 1:cols] y = data.iloc[:, 0:1] theta = np.zeros(cols - 1) # 转换 X = X.values y = y.values print(X.shape, y.shape, theta.shape) # 搞清楚矩阵的维度关系真的非常重要 print(np.mat(theta).shape) lam = 1 # c = cost(theta, X, y, lam) # print(c) result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y, lam)) print(result) plotData(dt)
def INLACauchy_log_Laplace(genotype, phenotype, theta1, theta2, v, lam, int_type, gamma): ''' if int_type = 1, it evaluates the numerator, integrating over the heteroskedastic parameter alpha if int_type = 0, it evaluates the denominator, setting log alpha to 0 @phenotypeput log_laplace_term (numerator/denominator), MAP estimates: alpha_hat, beta0_hat, beta_hat,sigma_hat -------------------------------------------------------------------------------------------------- dependends on: INLACauchy_h (evaluation offirst order derivatives), INLACauchy_h_hess(evaluation of the hessian), INLACauchy_hprime(first order derivative), INLACauchy_hhprime(second order derivative) ''' N = len(genotype) # set bounds for the integral estimation if int_type == 1: bound1 = 0.00000000000001 # for Cauchy, add small step for numerical errors bound2 = None # for Cauchy elif int_type == -1: bound1 = 1. # does not matter for Cauchy bound2 = None # does not matter for Cauchy else: bound1 = None bound2 = None # MAP estimates N = len(phenotype) params = [phenotype, genotype, theta1, theta2, v, lam, N, int_type, gamma] ############################################################## if int_type != 0: # perform triple integral estimation with prior over alpha if int_type == -1: ans = optimize.fmin_tnc( lambda x: INLACauchy_h(x,params), [1.,1.,0.], fprime= lambda x: INLACauchy_hprime(x,params), \ bounds=((bound1, bound2),(0.00001,None),(None,None)),\ epsilon =1e-5, disp = False) else: ans = optimize.fmin_tnc( lambda x: INLACauchy_h(x,params), [1.,1.,0.], fprime= lambda x: INLACauchy_hprime(x,params), \ bounds=((bound1, bound2),(0.000000000001,None),(None,None)),\ epsilon =1e-5,disp = False) [alpha_hat, sigma_hat, beta0_hat] = ans[0] evaluate_h = INLACauchy_h([alpha_hat, sigma_hat, beta0_hat], params) evaluate_hess = INLACauchy_h_hess( [alpha_hat, sigma_hat, beta0_hat], params) # fing the values of the hessian terms at MAP d = 3. S2 = sum([(genotype[i]**2) * (alpha_hat**genotype[i]) for i in range(len(genotype))]) S1 = sum([ genotype[i] * (alpha_hat**genotype[i]) for i in range(len(genotype)) ]) Q1 = sum([ genotype[i] * phenotype[i] * (alpha_hat**genotype[i]) for i in range(len(genotype)) ]) beta_hat = 1. / (v + 1. / sigma_hat * S2) * 1. / sigma_hat * ( Q1 - beta0_hat * S1) else: ans = optimize.fmin_tnc( lambda x: INLACauchy_h(x,params), [1., 0.00000001], fprime= lambda x: INLACauchy_hprime(x,params), \ bounds=((0.00001, None),(None, None)),epsilon =1e-5,disp =False) [ sigma_hat, beta0_hat, ] = ans[0] evaluate_h = INLACauchy_h( [sigma_hat, beta0_hat], params) # find the value of the h function at the MAP estimates evaluate_hess = INLACauchy_h_hess( [sigma_hat, beta0_hat], params) # fing the values of the hessian terms at MAP d = 2. alpha_hat = 1. S2 = sum([(genotype[i]**2) * (alpha_hat**genotype[i]) for i in range(len(genotype))]) S1 = sum([ genotype[i] * (alpha_hat**genotype[i]) for i in range(len(genotype)) ]) Q1 = sum([ genotype[i] * phenotype[i] * (alpha_hat**genotype[i]) for i in range(len(genotype)) ]) beta_hat = 1. / (v + 1. / sigma_hat * S2) * 1. / sigma_hat * ( Q1 - beta0_hat * S1) log_laplace_term = (- N * evaluate_h) + d/2. * np.log(2*np.pi) - \ 0.5 * np.log(abs(evaluate_hess)) - d/2. *np.log(N) return [log_laplace_term, alpha_hat, beta0_hat, beta_hat, sigma_hat]
lambda_temp = 10 cost = costFunctionReg(test_theta, X, y, lambda_temp) print('Cost at initial theta (zeros)(with lambda = 10): ' + str(cost)) print('Expected cost (approx): 3.16\n') grad = gradientReg(test_theta, X, y, lambda_temp) for i in range(5): print(np.round(grad[i], 4)) print('Gradient at test theta - first five values only:\n') #print(grad) print('Expected gradients (approx) - first five values only:\n') print(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n') #使用优化算法 lambda_temp = 1 result = opt.fmin_tnc(func=costFunctionReg1, x0=initial_theta, fprime=gradientReg1, args=(X, y, lambda_temp)) print(result) theta = np.mat(result[0]).T theta_min = np.mat(result[0]) predictions = predict(theta_min, X) print(classification_report(y, predictions)) correct = [ 1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y) ] accuracy = sum(map(int, correct)) / len(correct) * 100 print('accuracy = {:.2f}%'.format(accuracy))
#Uso de funcoes de optimizacao da biblioteca scipy.optimize from scipy.optimize import minimize, fmin_tnc, fmin, fmin_bfgs, fmin_ncg, leastsq, fmin_slsqp Result = minimize(fun=CalculoCusto, x0=initial_theta, args=(X, Y, m, n), method='TNC', jac=Gradient) optTheta = Result.x optJ = Result.fun print( 'Com minimize de scipy.optimize se chega a um custo optJ de {0} e optTheta {1}' .format(optJ, optTheta)) Result = fmin_tnc(func=CalculoCusto, x0=initial_theta, args=(X, Y, m, n), fprime=Gradient) tncTheta = Result[0] print( 'Com fmin_tnc de scipy.optimize se chega a tncTheta {0}'.format(tncTheta)) #Versao sem passar a funcao de calculo do gradient (parametro fprime), informando o param approx_gradbool com True Result = fmin_tnc(func=CalculoCusto, x0=initial_theta, args=(X, Y, m, n), approx_grad=True) tncTheta = Result[0] print( 'Com fmin_tnc de scipy.optimize, SEM PASSAR A FCT GRADIENT, se chega a tncTheta {0}' .format(tncTheta))
gradient = gradient_init + ((1 / m) * (np.dot((np.transpose(X)), ((i) - y)))) return J, gradient ############################################################################### ############################################################################### z = (np.dot(X, initial_theta)) J, gradient = cost_gradient(initial_theta, X, y) #FOR INITIAL THETA ############################################################################### #--------------------------------OPTIMIZATION---------------------------------- ############################################################################### import scipy.optimize as opt result = opt.fmin_tnc(func=cost_gradient, x0=initial_theta, args=(X, y)) optimal_theta = result[0] J, gradient = (cost_gradient(optimal_theta, X, y)) #FOR OPTIMAL THETA ############################################################################### ############################################################################### viewdata(data) decision_boundary(optimal_theta, X, y)
m = len(y) grad = np.zeros([m, 1]) grad = (1 / m) * X.T @ (sigmoid(X @ theta) - y) #grad[1:] = grad[1:] + (lambda_t / m) * theta[1:] return grad (m, n) = X.shape y = y[:, np.newaxis] theta = np.zeros((n, 1)) J = lrCostFunction(theta, X, y) print(J) output = opt.fmin_tnc(func = lrCostFunction, x0 = theta.flatten(), fprime = lrGradientDescent, \ args = (X, y.flatten())) theta = output[0] print(theta) # theta contains the optimized values J = lrCostFunction(theta, X, y) print(J) pred = [sigmoid(np.dot(X, theta)) >= 0.5] np.mean(pred == y.flatten()) * 100 u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((len(u), len(v))) def mapFeatureForPlotting(X1, X2): degree = 6