def MLE_marginal_iteration_constrain_paired(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length,psi1_last,psi2_last,var1_last,var2_last,rho_last,beta_0_last,beta_1_last,current_sum_last): #initial value rho=rho_last; var1=var1_last;var2=var2_last; psi1=psi1_last;psi2=psi2_last; beta_0=beta_0_last;beta_1=beta_1_last; current_sum=current_sum_last; #MLE of the marginal likelihood iter_cutoff=1;iter_maxrun=40;count=0;previous_sum=1;likelihood_sum=0; #print('psi');print(psi1);print(psi2); if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_marginal_1_paired,[beta_1],myfunc_marginal_1_der_paired,args=[i1,s1,psi1,var1,i2,s2,psi2,var2,effective_inclusion_length,effective_skipping_length,rho],bounds=[[pow(10,-2),1-pow(10,-2)-cutoff]],iprint=-1) beta_1 = max(min(float(xopt[0][0]),1-cutoff),0);beta_0=beta_1+cutoff; current_sum=float(xopt[1]); #rho=xopt[0][1]; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_marginal_2_paired,[beta_0],myfunc_marginal_2_der_paired,args=[i1,s1,psi1,var1,i2,s2,psi2,var2,effective_inclusion_length,effective_skipping_length,rho],bounds=[[pow(10,-2),1-pow(10,-2)-cutoff]],iprint=-1) beta_0 = max(min(float(xopt[0][0]),1-cutoff),0);beta_1=beta_0+cutoff; current_sum=float(xopt[1]); #rho=xopt[0][1]; print('constrain_MLE_marginal_xopt_mean');print(xopt);print(beta_0);print(beta_1); # if xopt[2]['warnflag']!=0: # return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2,rho,0]]); # else: return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2,rho,1]]);
def fit_predict(self, X, full_series=False): if not isinstance(X, TimeSeriesDataset): X = np.asanyarray(X, dtype='d') else: X = X.np_like_firstn() num_params = 5 init_params = np.random.rand(num_params) init_params[-1] = 1e-20 n, ticks = X.shape if not full_series: Y = np.zeros(shape=(n, self.steps_ahead), dtype='d') else: Y = np.zeros(shape=(n, ticks + self.steps_ahead), dtype='d') bounds = [(None, None), (None, None), (None, None), (None, None), (0, None)] for i in xrange(X.shape[0]): model = Model(self.normalize_err) fmin_l_bfgs_b(func=model, x0=init_params, approx_grad=True, bounds=bounds, args=(X[i], self.trend, self.period), iprint=-1) for _ in xrange(self.n_walks): y = np.asarray(model.walk(self.steps_ahead, full_series)) Y[i] += y Y /= self.n_walks return Y
def _bl_lbfgs_step(X_left, X_right, y, U, V, alpha=0.1, tol=1e-6, max_iter=1, verbose=False): u_new, loss, info = optimize.fmin_l_bfgs_b( _bilinear_loss_grad, U.ravel(), fprime=None, args=(V, X_left, X_right, y, alpha), iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter) U_new = u_new.reshape(U.shape) v_new, loss, info = optimize.fmin_l_bfgs_b( _bilinear_loss_grad, V.ravel(), fprime=None, args=(U_new, X_right, X_left, y, alpha), iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter) V_new = v_new.reshape(V.shape) return loss, U_new, V_new
def new_target(self, uav, noisy_states): ogrid = [np.linspace(*dim, num=res) for dim, res in zip( self.get_effective_area(), self.grid_resolution)] x, y, z = meshgrid_nd(*ogrid) acq = self.acquisition_fn( np.column_stack((x.flat, y.flat, z.flat)), uav, noisy_states) max_idx = np.unravel_index(np.argmax(acq), x.shape) x0 = np.array([x[max_idx], y[max_idx], z[max_idx]]) x, val, unused = fmin_l_bfgs_b( NegateFn(self.acquisition_fn).eval_with_derivative, x0, args=(uav, noisy_states,), bounds=self.get_effective_area(), pgtol=1e-10, factr=1e2) idx = np.argmax(self.acquisition_fn.predictor.y_train.data) x0 = np.asarray(noisy_states[uav].position) for dx in 5 * rnd.randn(5): x2, val2, unused = fmin_l_bfgs_b( NegateFn(self.acquisition_fn).eval_with_derivative, x0 + dx, args=(uav, noisy_states,), bounds=self.get_effective_area(), pgtol=1e-10, factr=1e2) if val2 < val: x = x2 idx = np.argmax(self.acquisition_fn.predictor.y_train.data) x0 = self.acquisition_fn.predictor.x_train.data[idx] for dx in 5 * rnd.randn(5): x2, val2, unused = fmin_l_bfgs_b( NegateFn(self.acquisition_fn).eval_with_derivative, x0 + dx, args=(uav, noisy_states,), bounds=self.get_effective_area(), pgtol=1e-10, factr=1e2) if val2 < val: x = x2 return [x]
def test_minimize_l_bfgs_b_maxfun_interruption(self): # gh-6162 f = optimize.rosen g = optimize.rosen_der values = [] x0 = np.ones(7) * 1000 def objfun(x): value = f(x) values.append(value) return value # Look for an interesting test case. # Request a maxfun that stops at a particularly bad function # evaluation somewhere between 100 and 300 evaluations. low, medium, high = 30, 100, 300 optimize.fmin_l_bfgs_b(objfun, x0, fprime=g, maxfun=high) v, k = max((y, i) for i, y in enumerate(values[medium:])) maxfun = medium + k # If the minimization strategy is reasonable, # the minimize() result should not be worse than the best # of the first 30 function evaluations. target = min(values[:low]) xmin, fmin, d = optimize.fmin_l_bfgs_b(f, x0, fprime=g, maxfun=maxfun) assert_array_less(fmin, target)
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; #Debug;print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); current_sum+=float(xopt[1]); new_psi1.append(theta1);new_psi2.append(theta2); psi1=new_psi1;psi2=new_psi2; if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; #Debug;print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); print('constrain');print(xopt);print(theta1);print(theta2); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def test_optimize(): L = np.max((X * X).sum(1)) step_size = 1.0 / L alpha = 0. def logloss(x): return logistic._logistic_loss(x, X, y, alpha) def fprime_logloss(x): return logistic._logistic_loss_and_grad(x, X, y, alpha)[1] opt = saga('log', None, X, y, np.zeros(n_features)) assert opt.success sol_scipy = optimize.fmin_l_bfgs_b( logloss, np.zeros(n_features), fprime=fprime_logloss)[0] np.testing.assert_allclose(sol_scipy, opt.x, rtol=1e-1) def squaredloss(w): return 0.5 * ((y - np.dot(X, w)) ** 2).sum() + 0.5 * alpha * w.dot(w) def fprime_squaredloss(w): return - X.T.dot(y - np.dot(X, w)) + alpha * w opt = saga('squared', None, X, y, np.zeros(n_features)) assert opt.success print(fprime_squaredloss(opt.x)) sol_scipy = optimize.fmin_l_bfgs_b( squaredloss, np.zeros(n_features), fprime=fprime_squaredloss)[0] print(fprime_squaredloss(sol_scipy)) np.testing.assert_allclose(sol_scipy, opt.x, rtol=1e-1)
def __updateD(X, A, D, R, nne, optfunc): f = 0 for i in range(len(X)): d = D[i, :] u = Updater(X[i], A, R) if nne > 0: bounds = len(d) * [(0, None)] res = fmin_l_bfgs_b( u.updateD_F, d, u.updateD_G, factr=1e12, bounds=bounds ) else: if optfunc == 'lbfgs': res = fmin_l_bfgs_b(u.updateD_F, d, u.updateD_G, factr=1e12) D[i, :] = res[0] f += res[1] elif optfunc == 'ncg': res = fmin_ncg( u.updateD_F, d, u.updateD_G, fhess=u.updateD_H, full_output=True, disp=False ) # TODO: check return value of ncg and update D, f raise NotImplementedError() elif optfunc == 'tnc': res = fmin_tnc(u.updateD_F, d, u.updateD_G, disp=False) # TODO: check return value of tnc and update D, f raise NotImplementedError() return D, f
def run(M,S,ctrl_pts,scales,lambdas,alphas,betas,iters,method,criterion): [n,d] = ctrl_pts.shape x0 = init_param(n,d) [basis, kernel] = prepare_basis(M, ctrl_pts, method) if criterion == 'L2': x = fmin_l_bfgs_b(obj_L2_TPS, x0, None, args=(basis,kernel,S,scales,lambdas),maxfun=iters) x0 = x[0] if criterion == 'KC': x = fmin_l_bfgs_b(obj_KC_TPS, x0, None, args=(basis,kernel,S,scales,alphas,betas),maxfun=iters) x0 = x[0] after_tps = transform_points(x0,basis) return after_tps
def fit(self, X, Y): # save data and labels for plotting methods self.data = X self.labels = Y # get number of observations, features from data self.n_obs, self.n_features = X.shape # now make the weights attribute self.weights = np.random.rand(self.n_features) self.weights_history.append(self.weights) # use the scipy optimize Conjugate Gradient method optimize.fmin_l_bfgs_b(self.cost, self.weights, fprime=self.gradient, args=(X, Y))
def pre_train(train_data, train_labels): global input_size, num_labels, hidden_size_l1, hidden_size_l2, ro, lbda, beta levels_l1=[input_size, hidden_size_l1, input_size] print 'Step 1:', time.time() # stack 1 init_theta = initialization(levels_l1) try: theta = np.genfromtxt("theta_l1.txt") print theta.shape except: theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(train_data, train_data, levels_l1, lbda, ro, beta), approx_grad=False, maxiter=400) np.savetxt("theta_l1.txt", theta) print "cost:", cost print "info:", info WB_l1 = vec2mat(theta, levels_l1) sio.savemat('W_l1', {'W_l1':WB_l1[0][0]}) print 'Step 2:', time.time() # stack 2 levels_l2 = [hidden_size_l1, hidden_size_l2, hidden_size_l1] train_l1_a2 = feed_forward(WB_l1[0], train_data, hidden_size_l1) init_theta = initialization(levels_l2) try: theta = np.genfromtxt("theta_l2.txt") print theta.shape except: theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(train_l1_a2, train_l1_a2, levels_l2, lbda, ro, beta), approx_grad=False, maxiter=400) np.savetxt("theta_l2.txt", theta) print "cost:", cost print "info:", info WB_l2 = vec2mat(theta, levels_l2) sio.savemat('W_l2', {'W_l2':WB_l2[0][0]}) print 'Step 3:', time.time() try: sr_theta = np.genfromtxt("sr_theta.txt") print sr_theta.shape except: train_l2_a2 = feed_forward(WB_l2[0], train_l1_a2, hidden_size_l2) sr_init_theta = sr_train.initialize_theta(hidden_size_l2, num_labels) sr_theta = sr_train.train(sr_init_theta, train_l2_a2, train_labels, num_labels) np.savetxt("sr_theta.txt", sr_theta) sr_mat = sr_vec2mat(sr_theta, num_labels) return WB_l1, WB_l2, sr_mat
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); print('var1');print(var1);print('var2');print(var2); #if abs(sum(psi1)/len(psi1)-sum(psi2)/len(psi2))>cutoff: if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); #else: # theta1=sum(psi1)/len(psi1);theta2=sum(psi2)/len(psi2); beta_0=theta1;beta_1=theta2; #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; print('constrain_2xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2; print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def fit(self, x, y, weights=None, **kwargs): """Train the model. x = (Nobs, nvars) y = (Nobs, ) = {0, 1} Bias term automatically added Returns the loss **kwags passed into fmin_l_bfgs_b""" from scipy.optimize import fmin_l_bfgs_b assert len(y) == x.shape[0] assert weights is None or len(weights) == x.shape[0] y0 = y == 0 x0 = x[y0, :] x1 = x[~y0, :] if weights is None: loss_weights = None else: loss_weights = [weights[y0], weights[~y0]] def _loss_for_optimize(params): return LogisticRegression._loss_gradient(x0, x1, params[0], params[1:], self.lam, loss_weights) params0 = np.zeros(1 + x.shape[1]) params_opt, loss_opt, info_opt = fmin_l_bfgs_b(_loss_for_optimize, params0, disp=0, **kwargs) print("%s funcalls: %s" % (info_opt['task'], info_opt['funcalls'])) self.b = params_opt[0] self.w = params_opt[1:]
def linear(x, fc, alpha = None, beta = None): Y = x[:] if (alpha == None or beta == None): initial_values = array([0.3, 0.1]) boundaries = [(0, 1), (0, 1)] type = 'Linear' parameters = fmin_l_bfgs_b(RMSE, x0 = initial_values, args = (Y, type), bounds = boundaries, approx_grad = True) alpha, beta = parameters[0] a = [Y[0]] b = [Y[1] - Y[0]] y = [a[0] + b[0]] rmse = 0 for i in range(len(Y) + fc): if i == len(Y): Y.append(a[-1] + b[-1]) a.append(alpha * Y[i] + (1 - alpha) * (a[i] + b[i])) b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i]) y.append(a[i + 1] + b[i + 1]) rmse = sqrt(sum([(m - n) ** 2 for m, n in zip(Y[:-fc], y[:-fc - 1])]) / len(Y[:-fc])) return Y[-fc:], alpha, beta, rmse
def linear_decoder_run_gpu(data, numInput, numHidden): print "Starting Feature Abstraction..." num_input = numInput num_hidden = numHidden num_output = numInput lambda_val = 3e-3 sparsityParam = 0.035 beta = 5 inputs = data r = gpu.sqrt(6)/gpu.sqrt(num_hidden+num_input+1) weights1 = (gpu.rand(num_hidden,num_input+1))*2*r-r weights2 = (gpu.rand(num_output,num_hidden+1))*2*r-r num_weights1 = (num_input+1)*num_hidden num_weights2 = (num_hidden+1)*num_output #weights1 = reshape(weights1, num_weights1) weights1 = weights1.reshape(num_weights1) #weights2 = reshape(weights2, num_weights2) weights2 = weights2.reshape(num_weights2) weights = hstack((weights1.as_numpy_array(),weights2.as_numpy_array())) args = (num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta) opttheta = optimize.fmin_l_bfgs_b(costfunc_gpu, weights, fprime=grad_costfunc_gpu, args=args, maxiter=400) weights = opttheta[0] weights1 = reshape(weights[0:num_weights1],(num_hidden,num_input+1)) weights2 = reshape(weights[num_weights1:shape(weights)[0]], (num_output,num_hidden+1)) scipy.io.savemat('learntFeaturesGPU.mat', mdict={'learntFeatures': weights1}) return weights1
def opt(self, x_init, f_fp=None, f=None, fp=None): """ Run the optimizer """ rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error'] assert f_fp != None, "BFGS requires f_fp" opt_dict = {} if self.xtol is not None: print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it") if self.ftol is not None: print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it") if self.gtol is not None: opt_dict['pgtol'] = self.gtol if self.bfgs_factor is not None: opt_dict['factr'] = self.bfgs_factor opt_result = optimize.fmin_l_bfgs_b(f_fp, x_init, maxfun=self.max_iters, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] self.status = rcstrings[opt_result[2]['warnflag']] #a more helpful error message is available in opt_result in the Error case if opt_result[2]['warnflag']==2: # pragma: no coverage, this is not needed to be covered self.status = 'Error' + str(opt_result[2]['task'])
def autofit_l_bfgs(self, x0): prm = self.autofit_prm bounds = [(p.min, p.max) for p in self.fitparams] from scipy.optimize import fmin_l_bfgs_b x, _f, _d = fmin_l_bfgs_b(self.get_norm_func(), x0, pgtol=prm.gtol, approx_grad=1, bounds=bounds) return x
def multiplicative(x, m, fc, alpha = None, beta = None, gamma = None): Y = x[:] if (alpha == None or beta == None or gamma == None): initial_values = array([0.0, 1.0, 0.0]) boundaries = [(0, 1), (0, 1), (0, 1)] type = 'Multiplicative' parameters = fmin_l_bfgs_b(RMSE, x0 = initial_values, args = (Y, type, m), bounds = boundaries, approx_grad = True) alpha, beta, gamma = parameters[0] a = [sum(Y[0:m]) / float(m)] b = [(sum(Y[m:2 * m]) - sum(Y[0:m])) / m ** 2] s = [Y[i] / a[0] for i in range(m)] y = [(a[0] + b[0]) * s[0]] rmse = 0 for i in range(len(Y) + fc): if i == len(Y): Y.append((a[-1] + b[-1]) * s[-m]) a.append(alpha * (Y[i] / s[i]) + (1 - alpha) * (a[i] + b[i])) b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i]) s.append(gamma * (Y[i] / (a[i] + b[i])) + (1 - gamma) * s[i]) y.append((a[i + 1] + b[i + 1]) * s[i + 1]) rmse = sqrt(sum([(m - n) ** 2 for m, n in zip(Y[:-fc], y[:-fc - 1])]) / len(Y[:-fc])) return Y[-fc:], alpha, beta, gamma, rmse
def main(): visible_size = 64 # number of input units hidden_size = 25 # number of hidden units sparsity_param = 0.01 # desired average activation lamb = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty dataset # Generate training and testing set data_train = np.random.rand(64, 1000) # 1000 samples, with # dimensionality of 64 #data_test = np.random.rand(64, 10) # Obtain random parameters concatenation theta = initial_parameter(hidden_size, visible_size) # Implement sparse autoencoder cost options = (data_train, visible_size, hidden_size, lamb, sparsity_param, beta) opttheta = optimize.fmin_l_bfgs_b(compute_cost, theta, compute_grad, options) # Output a .txt file fout = open("weights_bias.txt", 'w') for op in opttheta[0]: #print i fout.write(str(op) + '\n') print "Mission complete!"
def test_l_bfgs_b(self, use_wrapper=False): """ limited-memory bound-constrained BFGS algorithm """ if use_wrapper: opts = {'maxfev': self.maxiter} retval = optimize.fmincon(self.func, self.startparams, method='l-BFGS-B', args=(), jac=self.grad, options=opts, full_output=True) params = retval[0] else: retval = optimize.fmin_l_bfgs_b(self.func, self.startparams, self.grad, args=(), maxfun=self.maxiter) (params, fopt, d) = retval err = abs(self.func(params) - self.func(self.solution)) #print "LBFGSB: Difference is: " + str(err) assert_(err < 1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 7, self.funccalls) assert_(self.gradcalls == 5, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_(np.allclose(self.trace[3:5], [[0. , -0.52489628, 0.48753042], [0. , -0.52489628, 0.48753042]], atol=1e-14, rtol=1e-7), self.trace[3:5])
def test_l_bfgs_b_numjac(self): # L-BFGS-B with numerical jacobian retval = optimize.fmin_l_bfgs_b(self.func, self.startparams, approx_grad=True, maxiter=self.maxiter) (params, fopt, d) = retval assert_allclose(self.func(params), self.func(self.solution), atol=1e-6)
def train(self, A=None): """ Train an MLLT transform from an optional starting point. @param A: Initial MLLT matrix to start training. @type A: numpy.ndarray @return: Optimized MLLT transformation matrix @rtype: numpy.ndarray """ if A == None: # Initialize it with a random positive-definite matrix of # the same shape as the covariances s = self.cov[0].shape d = -1 while d < 0: # A = eye(s[0]) + 0.1 * random(s) A = eye(s[0]) d = det(A) # Flatten out the matrix so scipy.optimize can handle it AA, f, d = fmin_l_bfgs_b(self.objective, A.ravel(), args=A.shape, factr=10) if d['warnflag']: print "WARNING! MLLT optimization failed to converge" # Unflatten the return matrix return AA.reshape(A.shape)
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; #Debug;print('unconstrain_1xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i],psi2[i]],myfunc_individual_der,args=[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0][0]));current_sum+=float(xopt[1]); new_psi2.append(float(xopt[0][1])); #Debug;print(xopt); likelihood_sum+=myfunc_likelihood([new_psi1[i],new_psi2[i]],[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1]); psi1=new_psi1;psi2=new_psi2; #Debug;print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); print('unconstrain');print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def fit_full_curve(self): # main optimization if self.classic_curve_fitted==True: #R_T_init = (1.0/(self.plsq[0][0]*self.N)) # OLD # 23.05.14 ADM R_T_init = (1.0/(self.plsq[0][0])) # NEW # tunnel resistance of the single junction C_sigma_init=e**2/(self.plsq[0][2]*k)*1e15 T_p_init = self.plsq[0][1]*1e3 else: R_T_init = self.R_tunnel_init C_sigma_init = e**2/(self.TEC_init*k)*1e15 T_p_init = self.T_init*1e3 island_volume_init = self.island_size x1=[R_T_init,C_sigma_init,T_p_init] if self.bounds == None: self.xopt1 = optimize.fmin_bfgs(self.optimize_1, x1, gtol=1e-3,full_output=1, disp=1,callback=self.call_func) else: "Print optimizing with bounds" self.xopt1 = optimize.fmin_l_bfgs_b(self.optimize_1, x1, factr=1e7, approx_grad=True, bounds=self.bounds) toc = time.clock() print "==========================================" print "====== After main optimization: ======" print "==========================================" # 28.05.14 ADM print "R_T = %g"%(self.xopt1[0][0]) print "T = %g mK"%(self.xopt1[0][2]) print "C_sigma = %g "%(self.xopt1[0][1]) self.full_curve_fitted = True self.T_fit = self.xopt1[0][2] self.R_T = self.xopt1[0][0] self.C_sigma = self.xopt1[0][1]
def optimize_posterior_mean_and_std(model, X_lower, X_upper, startpoints=None, with_gradients=True): def f(x): mu, var = model.predict(x[np.newaxis, :]) return (mu + np.sqrt(var)) def df(x): dmu, dvar = model.predictive_gradients(x[np.newaxis, :]) _, var = model.predict(x[np.newaxis, :]) std = np.sqrt(var) # To get the gradients of the standard deviation # We need to apply chain rule (s(x)=sqrt[v(x)] => s'(x) = 1/2 * v'(x) / sqrt[v(x)] dstd = 0.5 * dvar / std return (dmu[:, :, 0] + dstd)[0, :] if startpoints is None: startpoints = [] startpoints.append(compute_incumbent(model)[0]) x_opt = np.zeros([len(startpoints), X_lower.shape[0]]) fval = np.zeros([len(startpoints)]) for i, startpoint in enumerate(startpoints): if with_gradients: res = optimize.fmin_l_bfgs_b(f, startpoint, df, bounds=list(zip(X_lower, X_upper))) x_opt[i] = res[0] fval[i] = res[1] else: res = optimize.minimize(f, startpoint, bounds=list(zip(X_lower, X_upper)), method="L-BFGS-B") x_opt[i] = res["x"] fval[i] = res["fun"] # Return the point with the lowest function value best = np.argmin(fval) return x_opt[best], fval[best]
def update_Mstep(self, scores, eta): def F(x): arg = x[0]+x[1]*scores func = arg*insum(eta.estim[:,1:],1) - nplog(1+np.exp(arg)) f = -1.*func.sum() if np.isnan(f) or np.isinf(f): return np.inf else: return f def Fprime(x): arg = x[0]+x[1]*scores df1 = insum(eta.estim[:,1:],1) - logistic(-arg) df2 = df1*scores Df = -1.*np.array([df1.sum(), df2.sum()]) if np.isnan(Df).any() or np.isinf(Df).any(): return np.inf else: return Df bounds = [(None, 0), (None, None)] xo = self.estim.copy() solution = opt.fmin_l_bfgs_b(F, xo, fprime=Fprime, bounds=bounds, disp=0) self.estim = solution[0] if np.isnan(self.estim).any(): print "Nan in Beta" raise ValueError if np.isinf(self.estim).any(): print "Inf in Beta" raise ValueError
def HWT(x, m, m2, forecast, alpha=None, gamma=None, delta=None, initial_values_optimization=[0.1, 0.2, 0.2]): Y = x[:] test_series = [] if (alpha == None or gamma == None or delta == None): boundaries = [(0, 1), (0, 1), (0, 1)] train_series = Y[:-m2 * 1] test_series = Y[-m2 * 1:] # print train_series # print test_series Y = train_series func = RMSE parameters = fmin_l_bfgs_b(func, x0=initial_values_optimization, args=(train_series, (m, m2), test_series), bounds=boundaries, approx_grad=True, factr=10 ** 3) alpha, gamma, delta = parameters[0] a = [sum(Y[0:m]) / float(m)] s = [Y[i] / a[0] for i in range(m)] s2 = [Y[i] / a[0] for i in range(0, m2, m)] y = [a[0] + s[0] + s2[0]] for i in range(len(Y) + forecast + len(test_series)): if i >= len(Y): Y.append(a[-1] + s[-m] + s2[-m2]) a.append(alpha * (Y[i] - s2[i] - s[i]) + (1 - alpha) * (a[i])) s.append(gamma * (Y[i] - a[i] - s2[i]) + (1 - gamma) * s[i]) s2.append(delta * (Y[i] - a[i] - s[i]) + (1 - delta) * s2[i]) y.append(a[i + 1] + s[i + 1] + s2[i + 1]) return Y[-forecast:], (alpha, gamma, delta), y[:-forecast],deque(a),deque(s),deque(s2),deque(Y)
def main(): # featureParams = np.genfromtxt('model/feature-params.txt') # transitiveParams = np.genfromtxt('model/transition-params.txt') # computeLogLikelihoodTestSet(featureParams, transitiveParams) # computeMessagePassing(feature, cliquePotential(feature)) dataSize = [50, 100, 150, 200, 250, 300, 350, 400] # gradientFunctionFeatureParam(featureParams, transitiveParams, 50) # gradientFunctionTransitiveParam(featureParams, transitiveParams, 50) global N trainig_time = list() for n in dataSize: x0 = np.zeros(10*321 + 10*10) start = time.clock() N = n sol = optimize.fmin_l_bfgs_b(computeLogLikelihood, x0, fprime=derivativeFunctions, args=()) t = (time.clock() - start)*1000 print(str(N) + " traning data: " + str(t)) trainig_time.append(t) featureParams = np.reshape(sol[0][:10*321], (10, 321)) transitiveParams = np.reshape(sol[0][10*321:], (10, 10)) computeModelAccuracy(featureParams, transitiveParams) computeLogLikelihoodTestSet(featureParams, transitiveParams)
def test_l_bfgs_b_bounds(self): """ L-BFGS-B with bounds """ x, f, d = optimize.fmin_l_bfgs_b(self.fun, [0, -1], fprime=self.jac, bounds=self.bounds) assert_(d['warnflag'] == 0, d['task']) assert_allclose(x, self.solution, atol=1e-6)
def optimise(self, theta=np.array([1.,1.,1.]), bounds=[(1e-8,None),(1e-8,None),(1e-8,None)], messages=False): res = fmin_l_bfgs_b(self.Neg_LML_Grads,theta, epsilon=1e-08, factr=1e7, bounds = bounds, maxiter=15000, maxfun=1000,pgtol=1e-05,m=10) if messages == True: print res[2] return res[0],res[1]
def fit(self,X,Y,disp=False,maxiter=1000,pgtol = 1e-5): self.X,self.Y=X,Y ps0 = self.get_params() x,f,d = fmin_l_bfgs_b(self.errf,ps0,fprime=self.errf_grad,args=([X,Y],),factr=10,pgtol=pgtol,disp=disp,maxiter=maxiter)
def dealias_region_based( radar, ref_vel_field=None, interval_splits=3, interval_limits=None, skip_between_rays=100, skip_along_ray=100, centered=True, nyquist_vel=None, check_nyquist_uniform=True, gatefilter=False, rays_wrap_around=None, keep_original=False, set_limits=True, vel_field=None, corr_vel_field=None, **kwargs): """ Dealias Doppler velocities using a region based algorithm. Performs Doppler velocity dealiasing by finding regions of similar velocities and unfolding and merging pairs of regions until all regions are unfolded. Unfolding and merging regions is accomplished by modeling the problem as a dynamic network reduction. Parameters ---------- radar : Radar Radar object containing Doppler velocities to dealias. ref_vel_field : str or None, optional Field in radar containing a reference velocity field used to anchor the unfolded velocities once the algorithm completes. Typically this field is created by simulating the radial velocities from wind data from an atmospheric sonding using :py:func:`pyart.util.simulated_vel_from_profile`. interval_splits : int, optional Number of segments to split the nyquist interval into when finding regions of similar velocity. More splits creates a larger number of initial regions which takes longer to process but may result in better dealiasing. The default value of 3 seems to be a good compromise between performance and artifact free dealiasing. This value is not used if the interval_limits parameter is not None. interval_limits : array like or None, optional Velocity limits used for finding regions of similar velocity. Should cover the entire nyquist interval. None, the default value, will split the Nyquist interval into interval_splits equal sized intervals. skip_between_rays, skip_along_ray : int, optional Maximum number of filtered gates to skip over when joining regions, gaps between region larger than this will not be connected. Parameters specify the maximum number of filtered gates between and along a ray. Set these parameters to 0 to disable unfolding across filtered gates. centered : bool, optional True to apply centering to each sweep after the dealiasing algorithm so that the average number of unfolding is near 0. False does not apply centering which may results in individual sweeps under or over folded by the nyquist interval. nyquist_velocity : array like or float, optional Nyquist velocity in unit identical to those stored in the radar's velocity field, either for each sweep or a single value which will be used for all sweeps. None will attempt to determine this value from the Radar object. check_nyquist_uniform : bool, optional True to check if the Nyquist velocities are uniform for all rays within a sweep, False will skip this check. This parameter is ignored when the nyquist_velocity parameter is not None. gatefilter : GateFilter, None or False, optional. A GateFilter instance which specified which gates should be ignored when performing de-aliasing. A value of None created this filter from the radar moments using any additional arguments by passing them to :py:func:`moment_based_gate_filter`. False, the default, disables filtering including all gates in the dealiasing. rays_wrap_around : bool or None, optional True when the rays at the beginning of the sweep and end of the sweep should be interpreted as connected when de-aliasing (PPI scans). False if they edges should not be interpreted as connected (other scan types). None will determine the correct value from the radar scan type. keep_original : bool, optional True to retain the original Doppler velocity values at gates where the dealiasing procedure fails or was not applied. False does not replacement and these gates will be masked in the corrected velocity field. set_limits : bool, optional True to set valid_min and valid_max elements in the returned dictionary. False will not set these dictionary elements. vel_field : str, optional Field in radar to use as the Doppler velocities during dealiasing. None will use the default field name from the Py-ART configuration file. corr_vel_field : str, optional Name to use for the dealiased Doppler velocity field metadata. None will use the default field name from the Py-ART configuration file. Returns ------- corr_vel : dict Field dictionary containing dealiased Doppler velocities. Dealiased array is stored under the 'data' key. """ # parse function parameters vel_field, corr_vel_field = _parse_fields(vel_field, corr_vel_field) gatefilter = _parse_gatefilter(gatefilter, radar, **kwargs) rays_wrap_around = _parse_rays_wrap_around(rays_wrap_around, radar) nyquist_vel = _parse_nyquist_vel(nyquist_vel, radar, check_nyquist_uniform) # parse ref_vel_field if ref_vel_field is None: ref_vdata = None else: ref_vdata = radar.fields[ref_vel_field]['data'] # exclude masked and invalid velocity gates gatefilter.exclude_masked(vel_field) gatefilter.exclude_invalid(vel_field) gfilter = gatefilter.gate_excluded # perform dealiasing vdata = radar.fields[vel_field]['data'].view(np.ndarray) data = vdata.copy() # dealiased velocities # loop over sweeps for nsweep, sweep_slice in enumerate(radar.iter_slice()): # extract sweep data sdata = vdata[sweep_slice].copy() # is a copy needed here? scorr = data[sweep_slice] sfilter = gfilter[sweep_slice] # find nyquist velocity and interval segmentation limits nyquist_interval = nyquist_vel[nsweep] * 2. if interval_limits is None: nvel = nyquist_vel[nsweep] valid_sdata = sdata[~sfilter] s_interval_limits = _find_sweep_interval_splits( nvel, interval_splits, valid_sdata, nsweep) else: s_interval_limits = interval_limits # find regions in original data labels, nfeatures = _find_regions(sdata, sfilter, s_interval_limits) # skip sweep if all gates are masked or only a single region if nfeatures < 2: continue bincount = np.bincount(labels.ravel()) num_masked_gates = bincount[0] region_sizes = bincount[1:] # find all edges between regions indices, edge_count, velos = _edge_sum_and_count( labels, num_masked_gates, sdata, rays_wrap_around, skip_between_rays, skip_along_ray) # no unfolding required if no edges exist between regions if len(edge_count) == 0: continue # find the number of folds in the regions region_tracker = _RegionTracker(region_sizes) edge_tracker = _EdgeTracker(indices, edge_count, velos, nyquist_interval, nfeatures+1) while True: if _combine_regions(region_tracker, edge_tracker): break # center sweep if requested, determine a global sweep unfold number # so that the average number of gate folds is zero. if centered: gates_dealiased = region_sizes.sum() total_folds = np.sum( region_sizes * region_tracker.unwrap_number[1:]) sweep_offset = int(round(float(total_folds) / gates_dealiased)) if sweep_offset != 0: region_tracker.unwrap_number -= sweep_offset # dealias the data using the fold numbers nwrap = np.take(region_tracker.unwrap_number, labels) scorr += nwrap * nyquist_interval # anchor unfolded velocities against reference velocity if ref_vdata is not None: sref = ref_vdata[sweep_slice] gfold = (sref-scorr).mean()/nyquist_interval gfold = round(gfold) # Anchor specific regions against reference velocity # Do this by constraining cost function due to difference # from reference velocity and to 2D continuity new_interval_limits = np.linspace(scorr.min(), scorr.max(), 10) labels_corr, nfeatures_corr = _find_regions( scorr, sfilter, new_interval_limits) # If we only have one region, just adjust the whole sweep by # x nyquist intervals if nfeatures_corr < 2: scorr = scorr + gfold * nyquist_interval else: bounds_list = [(x, y) for (x, y) in zip( -6*np.ones(nfeatures_corr), 5*np.ones(nfeatures_corr))] scorr_means = np.zeros(nfeatures_corr) sref_means = np.zeros(nfeatures_corr) for reg in range(1, nfeatures_corr+1): scorr_means[reg-1] = np.ma.mean(scorr[labels_corr == reg]) sref_means[reg-1] = np.ma.mean(sref[labels_corr == reg]) def cost_function(x): return _cost_function(x, scorr_means, sref_means, nyquist_interval, nfeatures_corr) def gradient(x): return _gradient(x, scorr_means, sref_means, nyquist_interval, nfeatures_corr) nyq_adjustments = fmin_l_bfgs_b( cost_function, gfold*np.ones((nfeatures_corr)), disp=True, fprime=gradient, bounds=bounds_list, maxiter=200, pgtol=nyquist_interval) i = 0 for reg in range(1, nfeatures_corr): scorr[labels == reg] += (nyquist_interval * np.round(nyq_adjustments[0][i])) i = i + 1 # fill_value from the velocity dictionary if present fill_value = radar.fields[vel_field].get('_FillValue', get_fillvalue()) # mask filtered gates if np.any(gfilter): data = np.ma.array(data, mask=gfilter, fill_value=fill_value) # restore original values where dealiasing not applied if keep_original: data[gfilter] = radar.fields[vel_field]['data'][gfilter] # return field dictionary containing dealiased Doppler velocities corr_vel = get_metadata(corr_vel_field) corr_vel['data'] = data corr_vel['_FillValue'] = fill_value if set_limits: # set valid_min and valid_max in corr_vel _set_limits(data, nyquist_vel, corr_vel) return corr_vel
activations_G, x) cache.grads = grads return J def df(x): assert cache.grads is not None grads = cache.grads cache.grads = None # fmin_l_bfgs_b only handles 1D arrays grads = grads.flatten() # fmin_l_bfgs_b needs this to be float64 for some undocumented weird reason grads = grads.astype(np.float64) return grads # It only occurred to me mid way through this project that I would not be able to use # Keras's built in optimizers, as the loss function needs to be supplied explicitly, # hence why using scipy's L-BFGS optimizer here. # The optimizer will find pixel values that lower the scalar output of the cost function 'f' generated_image, min_val, info = fmin_l_bfgs_b(f, generated_image.flatten(), fprime=df, maxfun=40) generated_image = generated_image.reshape(input_shape) image_utils.save_image('output/output_%d.png' % (time.time()), generated_image) #plt.imshow(image_utils.convert(generated_image)) plt.plot(np.arange(0, len(J_history)), J_history, label='J')
def fit(self, X, y, sample_weight=None): if sample_weight is not None: sample_weight = np.array(sample_weight) check_consistent_length(y, sample_weight) else: sample_weight = np.ones_like(y) xStd = DescrStatsW(X, weights=sample_weight, ddof=1).std X = X / xStd n_features = X.shape[1] if self.fit_intercept: n_parameters = n_features + 1 else: n_parameters = n_features parameters = np.zeros(n_parameters) if self.lower_bound is None: self.lower_bound = np.full([n_parameters], -np.inf) if self.upper_bound is None: self.upper_bound = np.full([n_parameters], np.inf) bounds = np.zeros([n_parameters, 2]) for i in range(0, n_parameters): bounds[i, 0] = self.lower_bound[i] bounds[i, 1] = self.upper_bound[i] if i < n_features: bounds[i, 0] = bounds[i, 0] * xStd[i] bounds[i, 1] = bounds[i, 1] * xStd[i] else: bounds[i, 0] = bounds[i, 0] bounds[i, 1] = bounds[i, 1] try: parameters, f, dict_ = optimize.fmin_l_bfgs_b( _logistic_loss_and_gradient, parameters, args=(X, y, self.alpha, sample_weight, xStd, self.standardization), maxiter=self.max_iter, tol=self.tol, bounds=bounds, iprint=0) except TypeError: parameters, f, dict_ = optimize.fmin_l_bfgs_b( _logistic_loss_and_gradient, parameters, args=(X, y, self.alpha, sample_weight, xStd, self.standardization), bounds=bounds) self.n_iter_ = dict_.get('nit', None) if self.fit_intercept: self.intercept_ = parameters[-1] else: self.intercept_ = 0.0 self.coef_ = parameters[:n_features] / xStd return self # Used for testing # X = np.array([[1.0,2.0],[-1.5,2.1],[-2.1,-2.0]]) # y = np.array([1.0, 1.0, 0.0]) # lor = LogisticRegression(100, 0.5, [-np.inf, -np.inf, -np.inf], [np.inf, np.inf, np.inf], True, 1e-6, False) # lor.fit(X, y) # print("coef: %s intercept: %s" % (lor.coef_, lor.intercept_)) # Test _logistic_loss_and_gradient # sample_weight = np.ones_like(y) # xStd = np.std(X, axis=0) # xStd = np.array([1, 1]) # (loss, grad) = _logistic_loss_and_gradient(np.array([1.0, 2.0]), X / xStd, y, 0.0, sample_weight, xStd, True) # print("loss: %f, grad: %s" % (loss, grad))
def estimate_activity(self, attenuation=None, psf=None, iterations=DEFAULT_ITERATIONS, subset_size=DEFAULT_SUBSET_SIZE, subset_mode='random', method="EM", m=32, factr=0.01, pgtol=1e-16, maxfun=10000, smoothing=0.0, activity=None): progress_bar = ProgressBar() progress_bar.set_percentage(0.1) if activity == None: activity = ones( (self._p_n_pix_x, self._p_n_pix_y, self._p_n_pix_x), dtype=float32, order="F") if attenuation is None: attenuation = self._attenuation if attenuation is not None: if isinstance(attenuation, ndarray): attenuation = float32(attenuation) else: attenuation = float32(attenuation.data) if psf is None: psf = self._psf if method == "EM": print("Reconstruction method: EM") for i in range(iterations): # Subsets: if subset_size is None: subsets_array = None subset_size = self._p_gantry_angular_positions elif subset_size >= self._p_gantry_angular_positions: subsets_array = None subset_size = self._p_gantry_angular_positions else: subsets_array = self._subset_generator.new_subset( subset_mode, subset_size) if subsets_array is not None: proj = self.project(activity, attenuation=attenuation, psf=psf, subsets_array=subsets_array).data measurement = self._measurement[:, :, where(subsets_array )].reshape( (self._p_n_pix_x, self._p_n_pix_y, subset_size)) measurement = asfortranarray( ascontiguousarray(measurement)) P = (measurement + EPS) / (proj + EPS) norm = self.backproject(ones( (self._p_n_pix_x, self._p_n_pix_y, subset_size), dtype=float32, order="F"), attenuation=attenuation, psf=psf, subsets_array=subsets_array).data update = (self.backproject( P, attenuation=attenuation, psf=psf, subsets_array=subsets_array).data + EPS) / (norm + EPS) else: proj = self.project(activity, attenuation=attenuation, psf=psf).data P = (self._measurement + EPS) / (proj + EPS) norm = self.get_normalization() update = (self.backproject( P, attenuation=attenuation, psf=psf).data + EPS) / (norm + EPS) activity = activity * update # * self.get_mask().data progress_bar.set_percentage((i + 1) * 100.0 / iterations) # print "Iteration: %d max act: %f min act: %f max proj: %f min proj: %f max norm: %f min norm: %f"%(i, activity.max(), activity.min(), proj.max(), proj.min(), norm.data.max(), norm.data.min() ) progress_bar.set_percentage(100.0) elif method == "LBFGS": print("Reconstruction method: LBFGS-B") bounds = [(None, None)] * activity.size for i in range(0, activity.size): bounds[i] = (0, None) args = [activity.shape, smoothing] activity0 = float64(activity.reshape(activity.size)) # print "SIZE ACTIVITY0: ",activity0.shape activity_rec, f, d = optimize.fmin_l_bfgs_b( self.get_likelihood, activity0, fprime=self.get_gradient_activity, m=m, factr=factr, pgtol=pgtol, args=args, maxfun=maxfun, iprint=0, bounds=bounds) activity = float32(activity_rec.reshape(activity.shape)) progress_bar.set_percentage(100.0) else: raise UnexpectedParameter("Reconstruction method %s unknown" % method) return Image3D(activity)
def run(self): optimizer = self.optimizer p = self.problem f = p.f grad = p.grad # coerce return types f = lambda wt: numpy.float64(p.f(wt)) grad = lambda wt: numpy.array(map(numpy.float64, p.grad(wt))) # negate for minimization neg_f = lambda wt: -f(wt) neg_grad = lambda wt: -grad(wt) if not p.useGrad(): neg_grad = None if not p.useF(): neg_f = lambda wt: -p.__fDummy(wt) if optimizer == "bfgs": params = dict( filter(lambda (k, v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs( neg_f, self.wt, fprime=neg_grad, full_output=True, **params) print "optimization done with %s..." % optimizer print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % ( -f_opt, func_calls, warn_flags) elif optimizer == "cg": params = dict( filter(lambda (k, v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg( neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) print "optimization done with %s..." % optimizer print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % ( -f_opt, func_calls, warn_flags) elif optimizer == "ncg": params = dict( filter(lambda (k, v): k in ["avextol", "epsilon", "maxiter"], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg( neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) print "optimization done with %s..." % optimizer print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % ( -f_opt, func_calls, warn_flags) elif optimizer == "fmin": params = dict( filter(lambda (k, v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) wt = fmin(neg_f, self.wt, args=(), full_output=True, **params) print "optimization done with %s..." % optimizer elif optimizer == "powell": params = dict( filter(lambda (k, v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params) print "optimization done with %s..." % optimizer elif optimizer == 'l-bfgs-b': params = dict( filter( lambda (k, v): k in ["gtol", "epsilon", "maxiter", 'bounds'], self.optParams.iteritems())) print "starting optimization with %s... %s" % (optimizer, params) if 'bounds' in params: params['bounds'] = (params['bounds'], ) * len(self.wt) wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params) print "optimization done with %s..." % optimizer print "f-opt: %.16f\n" % (-f_opt) else: raise Exception("Unknown optimizer '%s'" % optimizer) return wt
def next(self, grid, values, durations, candidates, pending, complete): # Don't bother using fancy GP stuff at first. if complete.shape[0] < 2: return int(candidates[0]) # Perform the real initialization. if self.D == -1: self._real_init(grid.shape[1], values[complete]) # Grab out the relevant sets. comp = grid[complete,:] cand = grid[candidates,:] pend = grid[pending,:] vals = values[complete] numcand = cand.shape[0] # Spray a set of candidates around the min so far best_comp = np.argmin(vals) cand2 = np.vstack((np.random.randn(10,comp.shape[1])*0.001 + comp[best_comp,:], cand)) if self.mcmc_iters > 0: # Possibly burn in. if self.needs_burnin: for mcmc_iter in xrange(self.burnin): self.sample_hypers(comp, vals) log("BURN %d/%d] mean: %.2f amp: %.2f " "noise: %.4f min_ls: %.4f max_ls: %.4f" % (mcmc_iter+1, self.burnin, self.mean, np.sqrt(self.amp2), self.noise, np.min(self.ls), np.max(self.ls))) self.needs_burnin = False # Sample from hyperparameters. # Adjust the candidates to hit ei peaks self.hyper_samples = [] for mcmc_iter in xrange(self.mcmc_iters): self.sample_hypers(comp, vals) log("%d/%d] mean: %.2f amp: %.2f noise: %.4f " "min_ls: %.4f max_ls: %.4f" % (mcmc_iter+1, self.mcmc_iters, self.mean, np.sqrt(self.amp2), self.noise, np.min(self.ls), np.max(self.ls))) self.dump_hypers() b = []# optimization bounds for i in xrange(0, cand.shape[1]): b.append((0, 1)) overall_ei = self.ei_over_hypers(comp,pend,cand2,vals) inds = np.argsort(np.mean(overall_ei,axis=1))[-self.grid_subset:] cand2 = cand2[inds,:] # Optimize each point in parallel if self.use_multiprocessing: pool = multiprocessing.Pool(self.grid_subset) results = [pool.apply_async(optimize_pt,args=( c,b,comp,pend,vals,copy.copy(self))) for c in cand2] for res in results: cand = np.vstack((cand, res.get(1e8))) pool.close() else: # This is old code to optimize each point in parallel. for i in xrange(0, cand2.shape[0]): log("Optimizing candidate %d/%d" % (i+1, cand2.shape[0])) #self.check_grad_ei(cand2[i,:].flatten(), comp, pend, vals) ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers, cand2[i,:].flatten(), args=(comp,pend,vals), bounds=b, disp=0) cand2[i,:] = ret[0] cand = np.vstack((cand, cand2)) overall_ei = self.ei_over_hypers(comp,pend,cand,vals) best_cand = np.argmax(np.mean(overall_ei, axis=1)) if (best_cand >= numcand): return (int(numcand), cand[best_cand,:]) return int(candidates[best_cand]) else: # Optimize hyperparameters self.optimize_hypers(comp, vals) log("mean: %.2f amp: %.2f noise: %.4f " "min_ls: %.4f max_ls: %.4f" % (self.mean, np.sqrt(self.amp2), self.noise, np.min(self.ls), np.max(self.ls))) # Optimize over EI b = []# optimization bounds for i in xrange(0, cand.shape[1]): b.append((0, 1)) for i in xrange(0, cand2.shape[0]): ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei, cand2[i,:].flatten(), args=(comp,vals,True), bounds=b, disp=0) cand2[i,:] = ret[0] cand = np.vstack((cand, cand2)) ei = self.compute_ei(comp, pend, cand, vals) best_cand = np.argmax(ei) if (best_cand >= numcand): return (int(numcand), cand[best_cand,:]) return int(candidates[best_cand])
def download(): K.clear_session() height = 224 width = 224 global img_array_2 global org_array_2 content = K.variable(org_array_2) style = K.variable(img_array_2) combined = K.placeholder((1, height, width, 3)) input_tensor = K.concatenate([content, style, combined], axis=0) model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) #extract layers layers = dict([(layer.name, layer.output) for layer in model.layers]) content_weight = 0.025 style_weight = 1 total_variation_weight = 1.0 loss = K.variable(0.) def content_loss(content, combination): return K.sum(K.square(combination - content)) layer_feature = layers['block2_conv2'] content_feature = layer_feature[0, :, :, :] combined_feature = layer_feature[2, :, :, :] loss += content_weight * content_loss(content_feature, combined_feature) def gram_matrix(x): flatten = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram_m = K.dot(flatten, K.transpose(flatten)) return gram_m def style_loss(style, combination): style = gram_matrix(layer_feature[1, :, :, :]) combination = gram_matrix(layer_feature[2, :, :, :]) M = height * width return (K.sum(K.square(combination - style))) / (4 * 3 * 3 * M**2) feature_layers = [ 'block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3', 'block5_conv3' ] for layer_name in feature_layers: layer_features = layers[layer_name] style_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_features, combination_features) loss += (style_weight / len(feature_layers)) * sl def total_variation_loss(x): a = K.square(x[:, :height - 1, :width - 1, :] - x[:, 1:, :width - 1, :]) b = K.square(x[:, :height - 1, :width - 1, :] - x[:, :height - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) loss += total_variation_weight * total_variation_loss(combined) grads = K.gradients(loss, combined)[0] grads = K.l2_normalize(grads) outputs = [loss] outputs.append(grads) f_outputs = K.function([combined], outputs) def loss_eval(m): m = m.reshape(1, height, width, 3) loss_, grad_ = f_outputs([m]) grad_ = grad_.flatten().astype('float64') return loss_, grad_ import time x = np.random.uniform(0, 255, (1, height, width, 3)) - 128. #loss_, grad_ = loss_eval(x) iterations = 10 for i in range(iterations): print('Start of iteration', i) start_time = time.time() x, min_val, info = fmin_l_bfgs_b(loss_eval, x0=x.flatten(), maxfun=20) print('Current loss value:', min_val) end_time = time.time() print('Iteration %d completed in %ds' % (i, end_time - start_time)) x2 = np.reshape(x, (224, 224, 3)) x2 = x2[:, :, ::-1] x2[:, :, 0] += 103.939 x2[:, :, 1] += 116.779 x2[:, :, 2] += 123.68 x2 = np.clip(x2, 0, 255).astype('uint8') img = Image.fromarray(x2) # convert numpy array to PIL Image # img = Image.fromarray(arr.astype('uint8')) # create file-object in memory file_object = io.BytesIO() # write PNG in file-object img.save(file_object, 'PNG') # move to beginning of file so `send_file()` it will read from start file_object.seek(0) img_array_2 = np.array([[1]]) org_array_2 = np.array([[1]]) # return return send_file(file_object, as_attachment=True, cache_timeout=0, attachment_filename='result.png')
def get_dd_wind_field(Grids, u_init, v_init, w_init, vel_name=None, refl_field=None, u_back=None, v_back=None, z_back=None, frz=4500.0, Co=1.0, Cm=1500.0, Cx=0.0, Cy=0.0, Cz=0.0, Cb=0.0, Cv=0.0, Cmod=0.0, Ut=None, Vt=None, filt_iterations=2, mask_outside_opt=False, weights_obs=None, weights_model=None, weights_bg=None, max_iterations=200, mask_w_outside_opt=True, filter_window=9, filter_order=4, min_bca=30.0, max_bca=150.0, upper_bc=True, model_fields=None, output_cost_functions=True): """ This function takes in a list of Py-ART Grid objects and derives a wind field. Every Py-ART Grid in Grids must have the same grid specification. In order for the model data constraint to be used, the model data must be added as a field to at least one of the grids in Grids. This involves interpolating the model data to the Grids' coordinates. There are helper functions for this for WRF and HRRR data in :py:func:`pydda.constraints`: :py:func:`make_constraint_from_wrf` :py:func:`add_hrrr_constraint_to_grid` Parameters ========== Grids: list of Py-ART Grids The list of Py-ART grids to take in corresponding to each radar. All grids must have the same shape, x coordinates, y coordinates and z coordinates. u_init: 3D ndarray The intial guess for the zonal wind field, input as a 3D array with the same shape as the fields in Grids. v_init: 3D ndarray The intial guess for the meridional wind field, input as a 3D array with the same shape as the fields in Grids. w_init: 3D ndarray The intial guess for the vertical wind field, input as a 3D array with the same shape as the fields in Grids. vel_name: string Name of radial velocity field. Setting to None will have PyDDA attempt to automatically detect the velocity field name. refl_field: string Name of reflectivity field. Setting to None will have PyDDA attempt to automatically detect the reflectivity field name. u_back: 1D array Background zonal wind field from a sounding as a function of height. This should be given in the sounding's vertical coordinates. v_back: 1D array Background meridional wind field from a sounding as a function of height. This should be given in the sounding's vertical coordinates. z_back: 1D array Heights corresponding to background wind field levels in meters. This is given in the sounding's original coordinates. frz: float Freezing level used for fall speed calculation in meters. Co: float Weight for cost function related to observed radial velocities. Cm: float Weight for cost function related to the mass continuity equation. Cx: float Weight for cost function related to smoothness in x direction Cy: float Weight for cost function related to smoothness in y direction Cz: float Weight for cost function related to smoothness in z direction Cv: float Weight for cost function related to vertical vorticity equation. Cmod: float Weight for cost function related to custom constraints. weights_obs: list of floating point arrays or None List of weights for each point in grid from each radar in Grids. Set to None to let PyDDA determine this automatically. weights_model: list of floating point arrays or None List of weights for each point in grid from each custom field in model_fields. Set to None to let PyDDA determine this automatically. weights_bg: list of floating point arrays or None List of weights for each point in grid from the sounding. Set to None to let PyDDA determine this automatically. Ut: float Prescribed storm motion in zonal direction. This is only needed if Cv is not zero. Vt: float Prescribed storm motion in meridional direction. This is only needed if Cv is not zero. filt_iterations: int If this is greater than 0, PyDDA will run a low pass filter on the retrieved wind field and then do the optimization step for filt_iterations iterations. Set to 0 to disable the low pass filter. max_outside_opt: bool If set to true, wind values outside the multiple doppler lobes will be masked, i.e. if less than 2 radars provide coverage for a given point. max_iterations: int The maximum number of iterations to run the optimization loop for. max_w_outside_opt: bool If set to true, vertical winds outside the multiple doppler lobes will be masked, i.e. if less than 2 radars provide coverage for a given point. filter_window: int Window size to use for the low pass filter. A larger window will increase the number of points factored into the polynomial fit for the filter, and hence will increase the smoothness. filter_order: int The order of the polynomial to use for the low pass filter. Higher order polynomials allow for the retention of smaller scale features but may also not remove enough noise. min_bca: float Minimum beam crossing angle in degrees between two radars. 30.0 is the typical value used in many publications. max_bca: float Minimum beam crossing angle in degrees between two radars. 150.0 is the typical value used in many publications. upper_bc: bool Set this to true to enforce w = 0 at the top of the atmosphere. This is commonly called the impermeability condition. model_fields: list of strings The list of fields in the first grid in Grids that contain the custom data interpolated to the Grid's grid specification. Helper functions to create such gridded fields for HRRR and NetCDF WRF data exist in ::pydda.constraints::. PyDDA will look for fields named U_(model field name), V_(model field name), and W_(model field name). For example, if you have U_hrrr, V_hrrr, and W_hrrr, then specify ["hrrr"] into model_fields. output_cost_functions: bool Set to True to output the value of each cost function every 10 iterations. Returns ======= new_grid_list: list A list of Py-ART grids containing the derived wind fields. These fields are displayable by the visualization module. """ num_evaluations = 0 # We have to have a prescribed storm motion for vorticity constraint if (Ut is None or Vt is None): if (Cv != 0.0): raise ValueError(('Ut and Vt cannot be None if vertical ' + 'vorticity constraint is enabled!')) if not isinstance(Grids, list): raise ValueError('Grids has to be a list!') # Ensure that all Grids are on the same coordinate system prev_grid = Grids[0] for g in Grids: if not np.allclose(g.x['data'], prev_grid.x['data'], atol=10): raise ValueError('Grids do not have equal x coordinates!') if not np.allclose(g.y['data'], prev_grid.y['data'], atol=10): raise ValueError('Grids do not have equal y coordinates!') if not np.allclose(g.z['data'], prev_grid.z['data'], atol=10): raise ValueError('Grids do not have equal z coordinates!') if not g.origin_latitude['data'] == prev_grid.origin_latitude['data']: raise ValueError(("Grids have unequal origin lat/lons!")) prev_grid = g # Disable background constraint if none provided if (u_back is None or v_back is None): u_back2 = np.zeros(u_init.shape[0]) v_back2 = np.zeros(v_init.shape[0]) C8 = 0.0 else: # Interpolate sounding to radar grid print('Interpolating sounding to radar grid') u_interp = interp1d(z_back, u_back, bounds_error=False) v_interp = interp1d(z_back, v_back, bounds_error=False) u_back2 = u_interp(Grids[0].z['data']) v_back2 = v_interp(Grids[0].z['data']) print('Interpolated U field:') print(u_back2) print('Interpolated V field:') print(v_back2) print('Grid levels:') print(Grids[0].z['data']) # Parse names of velocity field if refl_field is None: refl_field = pyart.config.get_field_name('reflectivity') # Parse names of velocity field if vel_name is None: vel_name = pyart.config.get_field_name('corrected_velocity') winds = np.stack([u_init, v_init, w_init]) wts = [] vrs = [] azs = [] els = [] # Set up wind fields and weights from each radar weights = np.zeros( (len(Grids), u_init.shape[0], u_init.shape[1], u_init.shape[2])) bg_weights = np.zeros(v_init.shape) if (model_fields is not None): mod_weights = np.ones((len(model_fields), u_init.shape[0], u_init.shape[1], u_init.shape[2])) else: mod_weights = np.zeros( (1, u_init.shape[0], u_init.shape[1], u_init.shape[2])) if (model_fields is None): if (Cmod != 0.0): raise ValueError( 'Cmod must be zero if model fields are not specified!') bca = np.zeros((len(Grids), len(Grids), u_init.shape[1], u_init.shape[2])) M = np.zeros(len(Grids)) sum_Vr = np.zeros(len(Grids)) for i in range(len(Grids)): wts.append( cost_functions.calculate_fall_speed(Grids[i], refl_field=refl_field)) add_azimuth_as_field(Grids[i], dz_name=refl_field) add_elevation_as_field(Grids[i], dz_name=refl_field) vrs.append(Grids[i].fields[vel_name]['data']) azs.append(Grids[i].fields['AZ']['data'] * np.pi / 180) els.append(Grids[i].fields['EL']['data'] * np.pi / 180) if (len(Grids) > 1): for i in range(len(Grids)): for j in range(i + 1, len(Grids)): print(("Calculating weights for radars " + str(i) + " and " + str(j))) bca[i, j] = get_bca(Grids[i].radar_longitude['data'], Grids[i].radar_latitude['data'], Grids[j].radar_longitude['data'], Grids[j].radar_latitude['data'], Grids[i].point_x['data'][0], Grids[i].point_y['data'][0], Grids[i].get_projparams()) for k in range(vrs[i].shape[0]): if (weights_obs is None): cur_array = weights[i, k] cur_array[np.logical_and( ~vrs[i][k].mask, np.logical_and( bca[i, j] >= math.radians(min_bca), bca[i, j] <= math.radians(max_bca)))] += 1 weights[i, k] = cur_array else: weights[i, k] = weights_obs[i][k, :, :] if (weights_obs is None): cur_array = weights[j, k] cur_array[np.logical_and( ~vrs[j][k].mask, np.logical_and( bca[i, j] >= math.radians(min_bca), bca[i, j] <= math.radians(max_bca)))] += 1 weights[j, k] = cur_array else: weights[j, k] = weights_obs[j][k, :, :] if (weights_bg is None): cur_array = bg_weights[k] cur_array[np.logical_or( bca[i, j] >= math.radians(min_bca), bca[i, j] <= math.radians(max_bca))] = 1 cur_array[vrs[i][k].mask] = 0 bg_weights[i] = cur_array else: bg_weights[i] = weights_bg[i] print("Calculating weights for models...") coverage_grade = weights.sum(axis=0) coverage_grade = coverage_grade / coverage_grade.max() # Weigh in model input more when we have no coverage # Model only weighs 1/(# of grids + 1) when there is full # Coverage if (model_fields is not None): if (weights_model is None): for i in range(len(model_fields)): mod_weights[i] = 1 - (coverage_grade / (len(Grids) + 1)) else: for i in range(len(model_fields)): mod_weights[i] = weights_model[i] else: weights[0] = np.where(~vrs[0].mask, 1, 0) bg_weights = np.where(~vrs[0].mask, 0, 1) weights[weights > 0] = 1 sum_Vr = np.sum(np.square(vrs * weights)) rmsVr = np.sum(sum_Vr) / np.sum(weights) del bca grid_shape = u_init.shape # Parse names of velocity field winds = winds.flatten() ndims = len(winds) print(("Starting solver ")) dx = np.diff(Grids[0].x['data'], axis=0)[0] dy = np.diff(Grids[0].y['data'], axis=0)[0] dz = np.diff(Grids[0].z['data'], axis=0)[0] print('rmsVR = ' + str(rmsVr)) print('Total points:' + str(weights.sum())) z = Grids[0].point_z['data'] the_time = time.time() bt = time.time() # First pass - no filter wcurr = w_init wprev = 100 * np.ones(w_init.shape) wprevmax = 99 wcurrmax = w_init.max() iterations = 0 warnflag = 99999 coeff_max = np.max([Co, Cb, Cm, Cx, Cy, Cz, Cb]) bounds = [(-x, x) for x in 100 * np.ones(winds.shape)] u_model = [] v_model = [] w_model = [] if (model_fields is not None): for the_field in model_fields: u_field = ("U_" + the_field) v_field = ("V_" + the_field) w_field = ("W_" + the_field) u_model.append(Grids[0].fields[u_field]["data"]) v_model.append(Grids[0].fields[v_field]["data"]) w_model.append(Grids[0].fields[w_field]["data"]) while (iterations < max_iterations and (abs(wprevmax - wcurrmax) > 0.02)): wprevmax = wcurrmax winds = fmin_l_bfgs_b(J_function, winds, args=(vrs, azs, els, wts, u_back, v_back, u_model, v_model, w_model, Co, Cm, Cx, Cy, Cz, Cb, Cv, Cmod, Ut, Vt, grid_shape, dx, dy, dz, z, rmsVr, weights, bg_weights, mod_weights, upper_bc, False), maxiter=10, pgtol=1e-3, bounds=bounds, fprime=grad_J, disp=0, iprint=-1) if (output_cost_functions is True): J_function(winds[0], vrs, azs, els, wts, u_back, v_back, u_model, v_model, w_model, Co, Cm, Cx, Cy, Cz, Cb, Cv, Cmod, Ut, Vt, grid_shape, dx, dy, dz, z, rmsVr, weights, bg_weights, mod_weights, upper_bc, True) grad_J(winds[0], vrs, azs, els, wts, u_back, v_back, u_model, v_model, w_model, Co, Cm, Cx, Cy, Cz, Cb, Cv, Cmod, Ut, Vt, grid_shape, dx, dy, dz, z, rmsVr, weights, bg_weights, mod_weights, upper_bc, True) warnflag = winds[2]['warnflag'] winds = np.reshape(winds[0], (3, grid_shape[0], grid_shape[1], grid_shape[2])) iterations = iterations + 10 print('Iterations before filter: ' + str(iterations)) wcurrmax = winds[2].max() winds = np.stack([winds[0], winds[1], winds[2]]) winds = winds.flatten() if (filt_iterations > 0): print('Applying low pass filter to wind field...') winds = np.reshape(winds, (3, grid_shape[0], grid_shape[1], grid_shape[2])) winds[0] = savgol_filter(winds[0], 9, 3, axis=0) winds[0] = savgol_filter(winds[0], 9, 3, axis=1) winds[0] = savgol_filter(winds[0], 9, 3, axis=2) winds[1] = savgol_filter(winds[1], 9, 3, axis=0) winds[1] = savgol_filter(winds[1], 9, 3, axis=1) winds[1] = savgol_filter(winds[1], 9, 3, axis=2) winds[2] = savgol_filter(winds[2], 9, 3, axis=0) winds[2] = savgol_filter(winds[2], 9, 3, axis=1) winds[2] = savgol_filter(winds[2], 9, 3, axis=2) winds = np.stack([winds[0], winds[1], winds[2]]) winds = winds.flatten() iterations = 0 while (iterations < filt_iterations): winds = fmin_l_bfgs_b( J_function, winds, args=(vrs, azs, els, wts, u_back, v_back, u_model, v_model, w_model, Co, Cm, Cx, Cy, Cz, Cb, Cv, Cmod, Ut, Vt, grid_shape, dx, dy, dz, z, rmsVr, weights, bg_weights, mod_weights, upper_bc, False), maxiter=10, pgtol=1e-3, bounds=bounds, fprime=grad_J, disp=0, iprint=-1) warnflag = winds[2]['warnflag'] winds = np.reshape( winds[0], (3, grid_shape[0], grid_shape[1], grid_shape[2])) iterations = iterations + 1 print('Iterations after filter: ' + str(iterations)) winds = np.stack([winds[0], winds[1], winds[2]]) winds = winds.flatten() print("Done! Time = " + "{:2.1f}".format(time.time() - bt)) # First pass - no filter the_winds = np.reshape(winds, (3, grid_shape[0], grid_shape[1], grid_shape[2])) u = the_winds[0] v = the_winds[1] w = the_winds[2] where_mask = np.sum(weights, axis=0) + np.sum(mod_weights, axis=0) u = np.ma.array(u) w = np.ma.array(w) v = np.ma.array(v) if (mask_outside_opt is True): u = np.ma.masked_where(where_mask < 1, u) v = np.ma.masked_where(where_mask < 1, v) w = np.ma.masked_where(where_mask < 1, w) if (mask_w_outside_opt is True): w = np.ma.masked_where(where_mask < 1, w) u_field = deepcopy(Grids[0].fields[vel_name]) u_field['data'] = u u_field['standard_name'] = 'u_wind' u_field['long_name'] = 'meridional component of wind velocity' u_field['min_bca'] = min_bca u_field['max_bca'] = max_bca v_field = deepcopy(Grids[0].fields[vel_name]) v_field['data'] = v v_field['standard_name'] = 'v_wind' v_field['long_name'] = 'zonal component of wind velocity' v_field['min_bca'] = min_bca v_field['max_bca'] = max_bca w_field = deepcopy(Grids[0].fields[vel_name]) w_field['data'] = w w_field['standard_name'] = 'w_wind' w_field['long_name'] = 'vertical component of wind velocity' w_field['min_bca'] = min_bca w_field['max_bca'] = max_bca new_grid_list = [] for grid in Grids: temp_grid = deepcopy(grid) temp_grid.add_field('u', u_field, replace_existing=True) temp_grid.add_field('v', v_field, replace_existing=True) temp_grid.add_field('w', w_field, replace_existing=True) new_grid_list.append(temp_grid) return new_grid_list
def update_uv(X, z, uv_hat0, constants=None, b_hat_0=None, debug=False, max_iter=300, eps=None, solver_d='alternate', momentum=False, uv_constraint='separate', loss='l2', loss_params=dict(), verbose=0): """Learn d's in time domain. Parameters ---------- X : array, shape (n_trials, n_channels, n_times) The data for sparse coding z : array, shape (n_trials, n_atoms, n_times - n_times_atom + 1) Can also be a list of n_trials LIL-sparse matrix of shape (n_atoms, n_times - n_times_atom + 1) The code for which to learn the atoms uv_hat0 : array, shape (n_atoms, n_channels + n_times_atom) The initial atoms. constants : dict or None Dictionary of constants to accelerate the computation of the gradients. It should only be given for loss='l2' and should contain ztz and ztX. b_hat_0 : array, shape (n_atoms * (n_channels + n_times_atom)) Init eigen-vector vector used in power_iteration, used in warm start. debug : bool If True, return the cost at each iteration. momentum : bool If True, use an accelerated version of the proximal gradient descent. uv_constraint : str in {'joint', 'separate', 'box'} The kind of norm constraint on the atoms: If 'joint', the constraint is norm_2([u, v]) <= 1 If 'separate', the constraint is norm_2(u) <= 1 and norm_2(v) <= 1 If 'box', the constraint is norm_inf([u, v]) <= 1 solver_d : str in {'alternate', 'joint', 'l-bfgs'} The type of solver to update d: If 'alternate', the solver alternates between u then v If 'joint', the solver jointly optimize uv with a line search If 'l-bfgs', the solver uses l-bfgs with box constraints loss : str in {'l2' | 'dtw' | 'whitening'} The data-fit loss_params : dict Parameters of the loss verbose : int Verbosity level. Returns ------- uv_hat : array, shape (n_atoms, n_channels + n_times_atom) The atoms to learn from the data. """ n_trials, n_atoms, n_times_valid = get_z_shape(z) _, n_channels, n_times = X.shape if solver_d == 'l-bfgs': msg = "L-BFGS sovler only works with box constraints" assert uv_constraint == 'box', msg elif solver_d == 'alternate': msg = "alternate solver should be used with separate constraints" assert uv_constraint == 'separate', msg if loss == 'l2' and constants is None: constants = _get_d_update_constants(X, z) def objective(uv): if loss == 'l2': return compute_objective(D=uv, constants=constants) return compute_X_and_objective_multi(X, z, D_hat=uv, loss=loss, loss_params=loss_params, feasible_evaluation=False) if solver_d in ['joint', 'fista']: # use FISTA on joint [u, v], with an adaptive step size def grad(uv): return gradient_uv(uv=uv, X=X, z=z, constants=constants, loss=loss, loss_params=loss_params) def prox(uv): return prox_uv(uv, uv_constraint=uv_constraint, n_channels=n_channels) uv_hat, pobj = fista(objective, grad, prox, None, uv_hat0, max_iter, verbose=verbose, momentum=momentum, eps=eps, adaptive_step_size=True, debug=debug, name="Update uv") elif solver_d in ['alternate', 'alternate_adaptive']: # use FISTA on alternate u and v adaptive_step_size = (solver_d == 'alternate_adaptive') uv_hat = uv_hat0.copy() u_hat, v_hat = uv_hat[:, :n_channels], uv_hat[:, n_channels:] def prox(u): u /= np.maximum(1., np.linalg.norm(u, axis=1))[:, None] return u for jj in range(1): # ---------------- update u def obj(u): uv = np.c_[u, v_hat] return objective(uv) def grad_u(u): uv = np.c_[u, v_hat] grad_d = gradient_d(uv, X=X, z=z, constants=constants, loss=loss, loss_params=loss_params) return (grad_d * uv[:, None, n_channels:]).sum(axis=2) if adaptive_step_size: Lu = 1 else: Lu = compute_lipschitz(uv_hat, constants, 'u', b_hat_0) assert Lu > 0 u_hat, pobj = fista(obj, grad_u, prox, 0.99 / Lu, u_hat, max_iter, verbose=verbose, momentum=momentum, eps=eps, adaptive_step_size=adaptive_step_size, debug=debug, name="Update u") uv_hat = np.c_[u_hat, v_hat] # ---------------- update v def obj(v): uv = np.c_[u_hat, v] return objective(uv) def grad_v(v): uv = np.c_[u_hat, v] grad_d = gradient_d(uv, X=X, z=z, constants=constants, loss=loss, loss_params=loss_params) return (grad_d * uv[:, :n_channels, None]).sum(axis=1) if adaptive_step_size: Lv = 1 else: Lv = compute_lipschitz(uv_hat, constants, 'v', b_hat_0) assert Lv > 0 v_hat, pobj_v = fista(obj, grad_v, prox, 0.99 / Lv, v_hat, max_iter, momentum=momentum, eps=eps, adaptive_step_size=adaptive_step_size, verbose=verbose, debug=debug, name="Update v") uv_hat = np.c_[u_hat, v_hat] if debug: pobj.extend(pobj_v) elif solver_d == 'l-bfgs': # use L-BFGS on joint [u, v] with a box constraint (L_inf norm <= 1) def func(uv): uv = np.reshape(uv, uv_hat0.shape) return objective(uv) def grad(uv): return gradient_uv(uv, constants=constants, flatten=True) bounds = [(-1, 1) for idx in range(0, uv_hat0.size)] if debug: assert optimize.check_grad(func, grad, uv_hat0.ravel()) < 1e-5 pobj = [objective(uv_hat0)] uv_hat, _, _ = optimize.fmin_l_bfgs_b(func, x0=uv_hat0.ravel(), fprime=grad, bounds=bounds, factr=1e7) uv_hat = np.reshape(uv_hat, uv_hat0.shape) if debug: pobj.append(objective(uv_hat)) else: raise ValueError('Unknown solver_d: %s' % (solver_d, )) if debug: return uv_hat, pobj return uv_hat
tf_session = K.get_session() contentModel = VGG16(include_top=False, weights="imagenet", input_tensor=cImArr) styleModel = VGG16(include_top=False, weights="imagenet", input_tensor=sImArr) gModel = VGG16(include_top=False, weights="imagenet", input_tensor=gImPlaceholder) P = get_feature_reps(x=cImArr, layer_names=cLayerNames, model=contentModel)[0] As = get_feature_reps(x=sImArr, layer_names=sLayerNames, model=styleModel) ws = np.ones(len(sLayerNames)) / float(len(sLayerNames)) x_val = gIm0.flatten() start = time.time() xopt, f_val, info = fmin_l_bfgs_b( calculate_loss, x_val, fprime=get_grad, maxiter=args.iterations, disp=True, callback=checkpoint_callback, ) xOut = postprocess_array(xopt) xIm = save_original_size(xOut) print("Image saved") end = time.time() print("Time taken: {}".format(end - start))
def _fit_lbfgs(f, score, start_params, fargs, kwargs, disp=True, maxiter=100, callback=None, retall=False, full_output=True, hess=None): """ Parameters ---------- f : function Returns negative log likelihood given parameters. score : function Returns gradient of negative log likelihood with respect to params. Notes ----- Within the mle part of statsmodels, the log likelihood function and its gradient with respect to the parameters do not have notationally consistent sign. """ # Use unconstrained optimization by default. bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) kwargs.setdefault('iprint', 0) # Pass the following keyword argument names through to fmin_l_bfgs_b # if they are present in kwargs, otherwise use the fmin_l_bfgs_b # default values. names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad') extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs) # Extract values for the options related to the gradient. approx_grad = kwargs.get('approx_grad', False) loglike_and_score = kwargs.get('loglike_and_score', None) epsilon = kwargs.get('epsilon', None) # The approx_grad flag has superpowers nullifying the score function arg. if approx_grad: score = None # Choose among three options for dealing with the gradient (the gradient # of a log likelihood function with respect to its parameters # is more specifically called the score in statistics terminology). # The first option is to use the finite-differences # approximation that is built into the fmin_l_bfgs_b optimizer. # The second option is to use the provided score function. # The third option is to use the score component of a provided # function that simultaneously evaluates the log likelihood and score. if epsilon and not approx_grad: raise ValueError('a finite-differences epsilon was provided ' 'even though we are not using approx_grad') if approx_grad and loglike_and_score: raise ValueError('gradient approximation was requested ' 'even though an analytic loglike_and_score function ' 'was given') if loglike_and_score: func = lambda p, *a: tuple(-x for x in loglike_and_score(p, *a)) elif score: func = f extra_kwargs['fprime'] = score elif approx_grad: func = f # Customize the fmin_l_bfgs_b call according to the scipy version. # Old scipy does not support maxiter and callback. scipy_version_curr = distutils.version.LooseVersion(scipy_version) scipy_version_12 = distutils.version.LooseVersion('0.12.0') if scipy_version_curr < scipy_version_12: retvals = optimize.fmin_l_bfgs_b(func, start_params, args=fargs, bounds=bounds, disp=disp, **extra_kwargs) else: retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter, callback=callback, args=fargs, bounds=bounds, disp=disp, **extra_kwargs) if full_output: xopt, fopt, d = retvals # The warnflag is # 0 if converged # 1 if too many function evaluations or too many iterations # 2 if stopped for another reason, given in d['task'] warnflag = d['warnflag'] converged = (warnflag == 0) gopt = d['grad'] fcalls = d['funcalls'] retvals = { 'fopt': fopt, 'gopt': gopt, 'fcalls': fcalls, 'warnflag': warnflag, 'converged': converged } else: xopt = retvals[0] retvals = None return xopt, retvals
def optimize_hypers(self, comp, vals): self.mean = np.mean(vals) diffs = vals - self.mean state = { } def jitter_chol(covmat): passed = False jitter = 1e-8 val = 0 while not passed: if (jitter > 100000): val = spla.cholesky(np.eye(covmat.shape[0])) break try: val = spla.cholesky(covmat + jitter*np.eye(covmat.shape[0]), lower=True) passed = True except ValueError: jitter = jitter*1.1 print "Covariance matrix not PSD, adding jitter:", jitter passed = False return val def memoize(amp2, noise, ls): if ( 'corr' not in state or state['amp2'] != amp2 or state['noise'] != noise or np.any(state['ls'] != ls)): # Get the correlation matrix (corr, grad_corr) = self.cov_func(ls, comp, None, grad=True) # Scale and add noise & jitter. covmat = (amp2 * (corr + 1e-6*np.eye(comp.shape[0])) + noise * np.eye(comp.shape[0])) # Memoize state['corr'] = corr state['grad_corr'] = grad_corr state['chol'] = jitter_chol(covmat) state['amp2'] = amp2 state['noise'] = noise state['ls'] = ls return (state['chol'], state['corr'], state['grad_corr']) def nlogprob(hypers): amp2 = np.exp(hypers[0]) noise = np.exp(hypers[1]) ls = np.exp(hypers[2:]) chol = memoize(amp2, noise, ls)[0] solve = spla.cho_solve((chol, True), diffs) lp = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(diffs, solve) return -lp def grad_nlogprob(hypers): amp2 = np.exp(hypers[0]) noise = np.exp(hypers[1]) ls = np.exp(hypers[2:]) chol, corr, grad_corr = memoize(amp2, noise, ls) solve = spla.cho_solve((chol, True), diffs) inv_cov = spla.cho_solve((chol, True), np.eye(chol.shape[0])) jacobian = np.outer(solve, solve) - inv_cov grad = np.zeros(self.D + 2) # Log amplitude gradient. grad[0] = 0.5 * np.trace(np.dot( jacobian, corr + 1e-6*np.eye(chol.shape[0]))) * amp2 # Log noise gradient. grad[1] = 0.5 * np.trace(np.dot( jacobian, np.eye(chol.shape[0]))) * noise # Log length scale gradients. for dd in xrange(self.D): grad[dd+2] = 1 * np.trace(np.dot( jacobian, -amp2*grad_corr[:,:,dd]*comp[:,dd][:,np.newaxis]/(np.exp(ls[dd]))))*np.exp(ls[dd]) # Roll in the prior variance. #grad -= 2*hypers/self.hyper_prior return -grad # Initial length scales. self.ls = np.ones(self.D) # Initial amplitude. self.amp2 = np.std(vals) # Initial observation noise. self.noise = 1e-3 hypers = np.zeros(self.ls.shape[0]+2) hypers[0] = np.log(self.amp2) hypers[1] = np.log(self.noise) hypers[2:] = np.log(self.ls) # Use a bounded bfgs just to prevent the length-scales and noise from # getting into regions that are numerically unstable b = [(-10,10),(-10,10)] for i in xrange(comp.shape[1]): b.append((-10,5)) hypers = spo.fmin_l_bfgs_b(nlogprob, hypers, grad_nlogprob, args=(), bounds=b, disp=0) #hypers = spo.fmin_bfgs(nlogprob, hypers, grad_nlogprob, maxiter=100) hypers = hypers[0] #hypers = spo.fmin_bfgs(nlogprob, hypers, grad_nlogprob, maxiter=100) self.amp2 = np.exp(hypers[0]) self.noise = np.exp(hypers[1]) self.ls = np.exp(hypers[2:])
def update_d(X, z, D_hat0, constants=None, b_hat_0=None, debug=False, max_iter=300, eps=None, solver_d='fista', momentum=False, uv_constraint='joint', loss='l2', loss_params=dict(), verbose=0): """Learn d's in time domain. Parameters ---------- X : array, shape (n_trials, n_channels, n_times) The data for sparse coding z : array, shape (n_trials, n_atoms, n_times - n_times_atom + 1) Can also be a list of n_trials LIL-sparse matrix of shape (n_atoms, n_times - n_times_atom + 1) The code for which to learn the atoms D_hat0 : array, shape (n_atoms, n_channels, n_times_atom) The initial atoms. constants : dict or None Dictionary of constants to accelerate the computation of the gradients. It should only be given for loss='l2' and should contain ztz and ztX. b_hat_0 : array, shape (n_atoms * (n_channels + n_times_atom)) Init eigen-vector vector used in power_iteration, used in warm start. debug : bool If True, return the cost at each iteration. momentum : bool If True, use an accelerated version of the proximal gradient descent. solver_d : str in {'fista', 'l-bfgs'} The type of solver to update d: If 'fista', the solver optimize D with fista and line search If 'l-bfgs', the solver uses l-bfgs with box constraints loss : str in {'l2' | 'dtw' | 'whitening'} The data-fit loss_params : dict Parameters of the loss verbose : int Verbosity level. Returns ------- D_hat : array, shape (n_atoms, n_channels, n_times_atom) The atoms to learn from the data. """ n_trials, n_atoms, n_times_valid = get_z_shape(z) _, n_channels, n_times = X.shape if loss == 'l2' and constants is None: constants = _get_d_update_constants(X, z) def objective(D, full=False): if loss == 'l2': return compute_objective(D=D, constants=constants) return compute_X_and_objective_multi(X, z, D_hat=D, loss=loss, loss_params=loss_params) if solver_d == 'fista': # use FISTA on joint [u, v], with an adaptive step size def grad(D): return gradient_d(D=D, X=X, z=z, constants=constants, loss=loss, loss_params=loss_params) def prox(D): return prox_d(D) D_hat, _ = fista(objective, grad, prox, None, D_hat0, max_iter, verbose=verbose, momentum=momentum, eps=eps, adaptive_step_size=True, debug=debug, name="Update D") elif solver_d == 'l-bfgs': # use L-BFGS on joint [u, v] with a box constraint (L_inf norm <= 1) def func(D): D = np.reshape(D, D_hat0.shape) return objective(D) def grad(D): return gradient_d(D, constants=constants, flatten=True) bounds = [(-1, 1) for idx in range(0, D_hat0.size)] if debug: assert optimize.check_grad(func, grad, D_hat0.ravel()) < 1e-5 pobj = [objective(D_hat0)] D_hat, _, _ = optimize.fmin_l_bfgs_b(func, x0=D_hat0.ravel(), fprime=grad, bounds=bounds, factr=1e7) D_hat = np.reshape(D_hat, D_hat0.shape) if debug: pobj.append(objective(D_hat)) else: raise ValueError('Unknown solver_d: %s' % (solver_d, )) if debug: return D_hat, pobj return D_hat
L_test = L_input[-constant_test_set_size:, :] y_train = y[:-constant_test_set_size] y_test = y[-constant_test_set_size:] train_set_size = int(constant_test_set_size) print("Starting regression.") w_init = np.array(np.zeros(X.shape[1] + 1 + 36)) lambda_penalty = 0 (w_bundle, _, _) = spopt.fmin_l_bfgs_b(log_loss, w_init, fprime=log_loss_gradient, args=(X_train, L_train, y_train, lambda_penalty), maxiter=100) np.save('apa_' + action + '_' + dataset + '_' + model_instance + '_weights', w_bundle) w = w_bundle[1 + 36:] w_L = w_bundle[1:1 + 36] w_0 = w_bundle[0] print('w_0 = ' + str(w_0)) print('w_L = ' + str(w_L)) y_train_hat = np.ravel(compute_probability_one(X_train, L_train, w, w_L, w_0)) y_test_hat = np.ravel(compute_probability_one(X_test, L_test, w, w_L, w_0))
# run scipy-based optimization (L-BFGS) over the pixels of the generated image # so as to minimize the loss x = preprocess_image(base_image_path) for i in range(5): print('Start of iteration', i) start_time = time.time() # add a random jitter to the initial image. This will be reverted at decoding time random_jitter = (settings['jitter'] * 2) * (np.random.random( (3, img_width, img_height)) - 0.5) x += random_jitter # run L-BFGS for 7 steps x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=7) print('Current loss value:', min_val) # decode the dream and save it x = x.reshape((3, img_width, img_height)) x -= random_jitter img = deprocess_image(x) fname = result_prefix + '_at_iteration_%d.png' % i imsave(fname, img) end_time = time.time() print('Image saved as', fname) print('Iteration %d completed in %ds' % (i, end_time - start_time))
def print_generated(): print 'Image Generated!' wait_label = Label(root, text='Please wait for art to be generated...') wait_label.grid(row=10) # Set default height and width for all images height = 256 width = 256 # Open and resize original image original_image = Image.open(original_file_path) original_image = original_image.resize((height, width)) # Open and resize style image style_image = Image.open(style_file_path) style_image = style_image.resize((height, width)) # Convert images to numerical form and add exra dimension so they can be concatenated into tensor later original_array = np.asarray(original_image, dtype='float32') original_array = np.expand_dims(original_array, axis=0) style_array = np.asarray(style_image, dtype='float32') style_array = np.expand_dims(style_array, axis=0) # Subtract mean RGB value from each pixel and create inverse of image arrays (more efficient for model to train on) original_array[:, :, :, 0] -= 103.939 original_array[:, :, :, 1] -= 116.779 original_array[:, :, :, 2] -= 123.68 original_array = original_array[:, :, :, ::-1] style_array[:, :, :, 0] -= 103.939 style_array[:, :, :, 1] -= 116.779 style_array[:, :, :, 2] -= 123.68 style_array = style_array[:, :, :, ::-1] # Create backend variables for both image arrays original_image = backend.variable(original_array) style_image = backend.variable(style_array) # Create placeholder for combined image with same dimensions combined_image = backend.placeholder((1, height, width, 3)) # Create tensor with all three images: original, style, and output input_tensor = backend.concatenate( [original_image, style_image, combined_image], axis=0) # Initialize VGG16 model in Keras and set default image classification weights model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) # Create dictionary of layers in VGG16 model layers = dict([(layer.name, layer.output) for layer in model.layers]) # Set arbitrary weights for content, style, and total variation loss content_weight = 0.035 style_weight = 5.0 total_variation_weight = 1.0 # Initialize complete loss to 0 complete_loss = backend.variable(0.) # Choose a hidden layer in VGG16 network and extract features for original and combined images in this layer layer_features = layers['block2_conv2'] original_image_features = layer_features[0, :, :, :] combined_image_features = layer_features[2, :, :, :] # Add weighted content loss to complete loss complete_loss += content_weight * content_loss(original_image_features, combined_image_features) # Create list of hidden layers that we can extract features about style and combined images feature_layers = [ 'block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3', 'block5_conv3' ] # Loop over every layer in feature_layers for layer in feature_layers: # Extract features of all images in layer layer_features = layers[layer] # Extract features of style and combined images style_features = layer_features[1, :, :, :] combined_features = layer_features[2, :, :, :] # Calculate style loss of style and combined images, and add weighted style loss to complete loss sl = style_loss(style_features, combined_features) complete_loss += (style_weight / len(feature_layers)) * sl # Add weighted total variation loss to complete loss complete_loss += total_variation_weight * total_variation_loss( combined_image) # Calculate gradients for complete loss relative to combined image gradients = backend.gradients(complete_loss, combined_image) outputs = [complete_loss] outputs += gradients f_outputs = backend.function([combined_image], outputs) def find_loss_and_gradients(x): x = x.reshape((1, height, width, 3)) outs = f_outputs([x]) loss_value = outs[0] gradient_values = outs[1].flatten().astype('float64') return loss_value, gradient_values class Evaluator(object): # Returns loss and gradients in two separate functions def __init__(self): # Initialize loss and gradient values as None self.loss_value = None self.gradient_values = None def loss(self, x): # Returns loss value is loss value has not already been computed assert self.loss_value is None loss_value, gradient_values = find_loss_and_gradients(x) self.loss_value = loss_value self.gradient_values = gradient_values return self.loss_value def grads(self, x): # Returns gradient value if loss value has been computed assert self.loss_value is not None gradient_values = np.copy(self.gradient_values) self.loss_value = None self.gradient_values = None return gradient_values evaluator = Evaluator() # Create random initial guesses for optimization function x = np.random.uniform(0, 255, (1, height, width, 3)) - 128. # Set number of iterations of optimization iterations = 10 print 'reached till here' for i in range(0, iterations): x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20) print i x = x.reshape((height, width, 3)) x = x[:, :, ::-1] x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 x = np.clip(x, 0, 255).astype('uint8') output_image = Image.fromarray(x) output_image.show() output_image.save('resultartist123' '.bmp')
def PLD_then_GP(q, id, kernel, kinit, kbounds, maxfun, datadir): ''' ''' global data if data is None: data = GetData(id, data_type='bkg', datadir=datadir) qd = data[q] if qd['time'] == []: return None # t/y for all chunks t_all = np.array([], dtype=float) y_all = np.array([], dtype=float) # Solve the (linear) PLD problem for this quarter fpix = np.array([x for y in qd['fpix'] for x in y]) fsum = np.sum(fpix, axis=1) init = np.array([np.median(fsum)] * fpix.shape[1]) def pm(y, *x): return np.sum(fpix * np.outer(1. / fsum, x), axis=1) x, _ = curve_fit(pm, None, fsum, p0=init) # Here's our detrended data y = [] t = [] e = [] for time, fpix, perr in zip(qd['time'], qd['fpix'], qd['perr']): # The pixel model fsum = np.sum(fpix, axis=1) pixmod = np.sum(fpix * np.outer(1. / fsum, x), axis=1) # The errors X = np.ones_like(time) + pixmod / fsum B = X.reshape(len(time), 1) * perr - x * perr / fsum.reshape( len(time), 1) yerr = np.sum(B**2, axis=1)**0.5 # Append to our arrays y.append(fsum - pixmod) t.append(time) e.append(yerr) # Now we solve for the best GP params res = fmin_l_bfgs_b(NegLnLikeGP, kinit, approx_grad=False, args=(t, y, e, kernel), bounds=kbounds, m=10, factr=1.e1, pgtol=1e-05, maxfun=maxfun) # Finally, detrend the data kernel.pars = res[0] gp = george.GP(kernel) for ti, yi, ei in zip(t, y, e): gp.compute(ti, ei) mu, _ = gp.predict(yi, ti) y_all = np.append(y_all, yi - mu) t_all = np.append(t_all, ti) return t_all, y_all
def minimize_cost_function(initial_theta, function_parameters): """Minimize the Cost Function""" return fmin_l_bfgs_b(cost_function_wrapper, initial_theta, fprime=gradients_wrapper, args=[function_parameters])
# In[ ]: from scipy.optimize import fmin_l_bfgs_b import time result_prefix = 'style_transfer_result' iterations = 20 x = preprocess_image(target_image_path) x = x.flatten() for i in range(iterations): print('반복 횟수:', i) start_time = time.time() x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x, fprime=evaluator.grads, maxfun=20) print('현재 손실 값:', min_val) img = x.copy().reshape((img_height, img_width, 3)) img = deprocess_image(img) fname = result_prefix + '_at_iteration_%d.png' % i save_img(fname, img) end_time = time.time() print('저장 이미지: ', fname) print('%d 번째 반복 완료: %ds' % (i, end_time - start_time)) # In[11]: from matplotlib import pyplot as plt # In[1]:
def fit(self, Y, X, weights): ''' Fits parameters of softmax regression using l-bfgs-b optimization procedure Parameters: ----------- X: numpy array of size 'n x m' Expalanatory variables Y_raw: numpy array of size 'n x 1' Dependent variables that need to be approximated k: int Number of classes weights: numpy array of size 'n x 1' Weighting for each observation ''' # initialise parameters n, m = np.shape(X) k = self.k # initiate parameters for fitting (avoids overparametarization) theta_initial = np.zeros([m, k - 1]) if self.theta is None: self.init_params(m, k) # Use previously fitted values for refitting, if weights in HME changed a # little this will provide much faster convergence since initialised parameters # will be near optimal point. theta_initial += self.theta[:, 1:] # save recovery paramters in case log-likelihood drops due to underflow theta_recovery = self.theta log_like_before = self.log_likelihood(X, Y, weights) # optimisation with lbfgsb fitter = lambda theta: cost_grad(theta, Y, X, k, weights) theta, J, D = fmin_l_bfgs_b(fitter, theta_initial, fprime=None, pgtol=self.tolerance, approx_grad=False, maxiter=self.max_iter) # theta with dimensionality m x k-1 theta = np.reshape(theta, (m, k - 1)) # transform to standard softmax representattion with m x k dimensionality self.theta = np.concatenate([np.zeros([m, 1]), theta], axis=1) # check behaviour of log-likelihood log_like_after = self.log_likelihood(X, Y, weights) delta_log_like = (log_like_after - log_like_before) / n # Code below is for two following cases: # # CASE 1: # In process of fitting deep HME due to errors in floating point # operations and underflows, when weights change is small # ( errors seem to start when total change in weights is 1e-30 and smaller) # log-likelihood of model after refitting can be smaller than before. # If that happens then model uses old parameters instead of new # # CASE 2: # Softmax regression suffers from the same # drawback as logistic regression, in case of perfect # or near perfect separability norm of parameters keep increasing ( basically # multiplying optimal w by constant). In that case change in parameters does # not decrease, while change in log-likelihood is tiny. # if delta_log_like < self.stop_learning: self.theta = theta_recovery delta_log_like = 0 # save changes in likelihood and parameters delta = self.theta - theta_recovery self.delta_log_like = delta_log_like self.delta_param_norm = np.sum(np.dot(delta.T, delta))
pl.figure(1) pl.clf() Z = X, Y = np.mgrid[-1.5:1.5:100j, -1.1:1.1:100j] # Complete in the additional dimensions with zeros Z = np.reshape(Z, (2, -1)).copy() Z.resize((100, Z.shape[-1])) Z = np.apply_along_axis(f, 0, Z) Z = np.reshape(Z, X.shape) pl.imshow(Z.T, cmap=pl.cm.gray_r, extent=[-1.5, 1.5, -1.1, 1.1], origin='lower') pl.contour(X, Y, Z, cmap=pl.cm.gnuplot) # A reference but slow solution: x_ref = optimize.fmin_powell(f, K[0], xtol=1e-10) t0 = time.time() x_bfgs = optimize.fmin_bfgs(f, K[0])[0] print 'BFGS: time %.1fs, error %.2f' % (t0 - time.time(), np.sum((x_bfgs - x_ref)**2)) x_l_bfgs = optimize.fmin_l_bfgs_b(f, K[0], approx_grad=1)[0] print 'BFGS: time %.1fs, error %.2f' % (t0 - time.time(), np.sum((xa - x_ref)**2)) # Plot our solution #pl.plot(x_min[0], x_min[1], 'r+', markersize=15) pl.show()
def wiener2(t, h, signal='Cauchy', noise='flat', return_PSDs=False, signal_params=None, noise_params=None, lowcut=None): """Compute a Wiener-filtered time-series This is a hacked version of the astroML wiener_filter to fit a Cauchy distribution rather than a Gaussian Also does gaussian and a two parameter model with cauchy + gaussian Parameters ---------- t : array_like evenly-sampled time series, length N h : array_like observations at each t signal : str (optional) currently only 'Cauchy' is supported noise : str (optional) currently only 'flat' is supported return_PSDs : bool (optional) if True, then return (PSD, P_S, P_N) signal_guess : tuple (optional) A starting guess at the parameters for the signal. If not specified, a suitable guess will be estimated from the data itself. (see Notes below) noise_guess : tuple (optional) A starting guess at the parameters for the noise. If not specified, a suitable guess will be estimated from the data itself. (see Notes below) Returns ------- h_smooth : ndarray a smoothed version of h, length N Notes ----- The Wiener filter operates by fitting a functional form to the PSD:: PSD = P_S + P_N The resulting frequency-space filter is given by:: Phi = P_S / (P_S + P_N) This entire operation is equivalent to a kernel smoothing by a kernel whose Fourier transform is Phi. Choosing Signal/Noise Parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the arguments ``signal_guess`` and ``noise_guess`` specify the initial guess for the characteristics of signal and noise used in the minimization. They are generally expected to be tuples, and the meaning varies depending on the form of signal and noise used. For ``gaussian``, the params are (amplitude, width). For ``flat``, the params are (amplitude,). See Also -------- scipy.signal.wiener : a static (non-adaptive) wiener filter """ # Validate signal if signal != 'Cauchy': raise ValueError("only signal='Cauchy' is supported") if signal_params is not None and len(signal_params) != 2: raise ValueError("signal_params should be length 2") # Validate noise if noise != 'flat': raise ValueError("only noise='flat' is supported") if noise_params is not None and len(noise_params) != 1: raise ValueError("noise_params should be length 1") # Validate t and hd t = np.asarray(t) h = np.asarray(h) if (t.ndim != 1) or (t.shape != h.shape): raise ValueError('t and h must be equal-length 1-dimensional arrays') # compute the PSD of the input N = len(t) Df = 1. / N / (t[1] - t[0]) f = fftpack.ifftshift(Df * (np.arange(N) - N / 2)) H = fftpack.fft(h) PSD = abs(H)**2 # fit signal/noise params if necessary if signal_params is None: amp_guess = np.max(PSD[1:]) width_guess = np.min(np.abs(f[PSD[1:] < np.mean(PSD[1:])])) signal_params = (amp_guess, width_guess) if noise_params is None: noise_params = (np.mean(PSD[1:]), ) # Set up the Wiener filter: # fit a model to the PSD: sum of signal form and noise form def signalL2(x, A, width): """ Lorentzian """ width = abs(width) + 1E-99 # prevent divide-by-zero errors L1 = A * (width / ((x)**2 + width**2)) return L1 def noise(x, n): return n * np.ones(x.shape) # use [1:] here to remove the zero-frequency term: we don't want to # fit to this for data with an offset. min_func = lambda v: np.sum( (PSD[1:] - signalL2(f[1:], v[0], v[1]) - noise(f[1:], v[2]))**2) v0 = tuple(signal_params) + tuple(noise_params) v, d1, d2 = optimize.fmin_l_bfgs_b(min_func, v0, approx_grad=True) P_S = signalL2(f, v[0], v[1]) P_N = noise(f, v[2]) #shall cutoff our filter at low frequency? #obviously not, that would be silly if lowcut != None: cutoff_freq = lowcut mask = f[1:] < cutoff_freq P_S[mask] = 0.0 Phi = P_S / (P_S + P_N) Phi[0] = 1 # correct for DC offset # Use Phi to filter and smooth the values h_smooth = fftpack.ifft(Phi * H) if not np.iscomplexobj(h): h_smooth = h_smooth.real if return_PSDs: return h_smooth, PSD, P_S, P_N, Phi else: return h_smooth
cLayerName = 'block4_conv2' sLayerNames = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', #'block5_conv1' ] P = get_feature_reps(x=cImArr, layer_names=[cLayerName], model=cModel)[0] As = get_feature_reps(x=sImArr, layer_names=sLayerNames, model=sModel) ws = np.ones(len(sLayerNames))/float(len(sLayerNames)) iterations = 25 ## Originally 600 x_val = gIm0.flatten() start = time.time() xopt, f_val, info= fmin_l_bfgs_b(calculate_loss, x_val, fprime=get_grad, maxiter=iterations, disp=True) ## VP's CODE import matplotlib.pyplot as plt plt.imshow(xopt) plt.show() ## xOut = postprocess_array(xopt) xIm = save_original_size(xOut) print('Image saved') end = time.time() print('Time taken: {}'.format(end-start))
def test_l_bfgs_b_funjac(self): # L-BFGS-B with fun and jac combined and extra arguments x, f, d = optimize.fmin_l_bfgs_b(self.fj, [0, -1], args=(2.0, ), bounds=self.bounds) assert_(d['warnflag'] == 0, d['task']) assert_allclose(x, self.solution, atol=1e-6)
loss, gradients = evaluate_loss_and_gradients(x) self._gradients = gradients return loss def gradients(self, x): return self._gradients evaluator = Evaluator() # Optimize loss using Limited-memory BFGS algorithm x = np.random.uniform(0, 255, (1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) - 128. for i in range(ITERATIONS): x, loss, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.gradients, maxfun=20) print("Iteration %d completed with loss %d" % (i, loss)) x = x.reshape((IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) x = x[:, :, ::-1] x[:, :, 0] += IMAGENET_MEAN_RGB_VALUES[2] x[:, :, 1] += IMAGENET_MEAN_RGB_VALUES[1] x[:, :, 2] += IMAGENET_MEAN_RGB_VALUES[0] x = np.clip(x, 0, 255).astype("uint8") output_image = Image.fromarray(x) output_image.save(output_image_path) # Visualizing combined results combined = Image.new("RGB", (IMAGE_WIDTH * 3, IMAGE_HEIGHT)) x_offset = 0
def test_l_bfgs_b_bounds(self): x, f, d = optimize.fmin_l_bfgs_b(self.fun, [0, -1], fprime=self.jac, bounds=self.bounds) assert_(d['warnflag'] == 0, d['task']) assert_allclose(x, self.solution, atol=1e-6)
def fit(self,X,Y,disp=False,maxiter=1000,pgtol = 1e-5,drop_first=None): ps0 = self.get_params() x,f,d = fmin_l_bfgs_b(self.errf,ps0,fprime=self.errf_grad,args=([X,Y,drop_first],),factr=10,pgtol=pgtol,disp=disp,maxiter=maxiter)
def optimize_pt(c, b, comp, pend, vals, model): ret = spo.fmin_l_bfgs_b(model.grad_optimize_ei_over_hypers, c.flatten(), args=(comp, pend, vals), bounds=b, disp=0) return ret[0]
def find_optimum(GInit, G, y, yTilde, YTilde, theta, cfg): """ Find the optimal solution for the BioEn problem using numerical optimization. Gradients are calculated analytically, which typically gives a large speed up. Parameters ---------- GInit: array_like, vector with N components, starting value for optimization G: array_like, vector with N components, derived from BioEn inital weights (reference probabilities) y: array_like, MxN matrix yTilde: array_like, MxN matrix YTilde: array_like, vector with M components theta: float, confidence parameter Returns ------- wopt: optimized weights yopt: measurement value after refinement fmin_initial: float, starting negative log-likelihood (optional) fmin_final: float, final negative log-lilelihood (optional) """ check_params_logweights(GInit, G, y, yTilde, YTilde) minimizer = cfg["minimizer"] caching = cfg["cache_ytilde_transposed"] if (caching == "auto"): m = yTilde.shape[0] n = yTilde.shape[1] caching = common.set_caching_heuristics(m, n) cfg["cache_ytilde_transposed"] = caching g = GInit.copy() gPrime = np.asarray(g[:].T)[0] fmin_initial = bioen_log_posterior_base(gPrime, g, G, yTilde, YTilde, theta) if cfg["verbose"]: print("fmin_initial", fmin_initial) start = time.time() if cfg["minimizer"].upper() == 'LIBLBFGS' or cfg["minimizer"].upper( ) == "LBFGS": common.print_highlighted("LOGW -- Library L-BFGS/C", cfg["verbose"]) res = c_bioen.bioen_opt_lbfgs_logw(gPrime, G, yTilde, YTilde, theta, cfg) elif cfg["minimizer"].upper() == 'GSL': common.print_highlighted("LOGW -- Library GSL/C", cfg["verbose"]) res = c_bioen.bioen_opt_bfgs_logw(gPrime, G, yTilde, YTilde, theta, cfg) elif cfg["minimizer"].upper( ) == 'SCIPY' and cfg["use_c_functions"] == True: common.print_highlighted("LOGW -- Library scipy/C", cfg["verbose"]) caching = cfg["cache_ytilde_transposed"] if cfg["algorithm"].lower() == 'lbfgs' or cfg["algorithm"].lower( ) == "fmin_l_bfgs_b": common.print_highlighted('method L-BFGS', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "caching_yTilde_transposed : ", caching) print("\t", "epsilon : ", cfg["params"]["epsilon"]) print("\t", "pgtol : ", cfg["params"]["pgtol"]) print("\t", "maxiter : ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_l_bfgs_b( c_bioen.bioen_log_posterior_logw, gPrime, args=(g, G, yTilde, YTilde, theta, caching), fprime=c_bioen.grad_bioen_log_posterior_logw, epsilon=cfg["params"]["epsilon"], pgtol=cfg["params"]["pgtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"]) elif cfg["algorithm"].lower() == 'bfgs' or cfg["algorithm"].lower( ) == "fmin_bfgs": common.print_highlighted('method BFGS', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "caching_yTilde_transposed : ", caching) print("\t", "epsilon : ", cfg["params"]["epsilon"]) print("\t", "gtol : ", cfg["params"]["gtol"]) print("\t", "maxiter : ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_bfgs(c_bioen.bioen_log_posterior_logw, gPrime, args=(g, G, yTilde, YTilde, theta, caching), fprime=c_bioen.grad_bioen_log_posterior_logw, epsilon=cfg["params"]["epsilon"], gtol=cfg["params"]["gtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"], full_output=True) elif cfg["algorithm"].lower() == 'cg' or cfg["algorithm"].lower( ) == 'fmin_cg': common.print_highlighted('method CG', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "caching_yTilde_transposed : ", caching) print("\t", "epsilon : ", cfg["params"]["epsilon"]) print("\t", "gtol : ", cfg["params"]["gtol"]) print("\t", "maxiter : ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_cg(c_bioen.bioen_log_posterior_logw, gPrime, args=(g, G, yTilde, YTilde, theta, caching), fprime=c_bioen.grad_bioen_log_posterior_logw, epsilon=cfg["params"]["epsilon"], gtol=cfg["params"]["gtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"], full_output=True) else: raise RuntimeError( "Method '" + cfg["algorithm"] + "' not recognized for scipy/c library (valid values = 'lbfgs', 'bfgs', 'cg' ) " ) elif cfg["minimizer"].upper( ) == 'SCIPY' and cfg["use_c_functions"] == False: common.print_highlighted("LOGW -- Library scipy/PY", cfg["verbose"]) if cfg["algorithm"].lower() == 'lbfgs' or cfg["algorithm"].lower( ) == "fmin_l_bfgs_b": common.print_highlighted('method L-BFGS', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "epsilon ", cfg["params"]["epsilon"]) print("\t", "pgtol ", cfg["params"]["pgtol"]) print("\t", "maxiter ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_l_bfgs_b(bioen_log_posterior_base, gPrime, args=(g, G, yTilde, YTilde, theta), fprime=grad_bioen_log_posterior_base, epsilon=cfg["params"]["epsilon"], pgtol=cfg["params"]["pgtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"]) elif cfg["algorithm"].lower() == 'bfgs' or cfg["algorithm"].lower( ) == "fmin_bfgs": common.print_highlighted('method BFGS', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "epsilon ", cfg["params"]["epsilon"]) print("\t", "gtol ", cfg["params"]["gtol"]) print("\t", "maxiter ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_bfgs(bioen_log_posterior_base, gPrime, args=(g, G, yTilde, YTilde, theta), fprime=grad_bioen_log_posterior_base, epsilon=cfg["params"]["epsilon"], gtol=cfg["params"]["gtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"], full_output=True) elif cfg["algorithm"].lower() == 'cg' or cfg["algorithm"].lower( ) == 'fmin_cg': common.print_highlighted('method CG', cfg["verbose"]) if cfg["verbose"]: print("\t", "=" * 25) print("\t", "epsilon ", cfg["params"]["epsilon"]) print("\t", "gtol ", cfg["params"]["gtol"]) print("\t", "maxiter ", cfg["params"]["max_iterations"]) print("\t", "=" * 25) res = sopt.fmin_cg(bioen_log_posterior_base, gPrime, args=(g, G, yTilde, YTilde, theta), fprime=grad_bioen_log_posterior_base, epsilon=cfg["params"]["epsilon"], gtol=cfg["params"]["gtol"], maxiter=cfg["params"]["max_iterations"], disp=cfg["verbose"], full_output=True) else: raise RuntimeError( "Method '" + cfg["algorithm"] + "' not recognized for scipy/py library (valid values = 'lbfgs', 'bfgs', 'cg' ) " ) else: raise RuntimeError( "Library " + cfg["minimizer"] + " not recognized (valid values = 'LIBLBFGS', 'GSL', 'scipy', 'scipy' ) " ) end = time.time() if cfg["verbose"]: print('time elapsed ', (end - start)) gopt = res[0] fmin_final = res[1] wopt = getWOpt(G, gopt) yopt = common.getAve(wopt, y) if cfg["verbose"]: print("========================") print("fmin_initial = ", fmin_initial) print("fmin_final = ", fmin_final) print("========================") return wopt, yopt, gopt, fmin_initial, fmin_final