def pgd_regression(home,project_name,run_name,run_number,norm=2): ''' Regress for PGD scaling law ''' from numpy import genfromtxt,array,zeros,log10,expand_dims,ones,diag,c_ from string import replace from obspy.geodetics.base import gps2dist_azimuth from l1 import l1 from cvxopt import matrix from scipy.linalg import norm as vecnorm from numpy.linalg import lstsq # Read summary file summary_file=home+project_name+'/output/waveforms/'+run_name+'.'+run_number+'/_summary.'+run_name+'.'+run_number+'.txt' lonlat=genfromtxt(summary_file,usecols=[1,2]) pgd=genfromtxt(summary_file,usecols=[6])*100 # Get hypocenter or centroid event_log=home+project_name+'/output/ruptures/'+run_name+'.'+run_number+'.log' f=open(event_log,'r') loop_go=True while loop_go: line=f.readline() if 'Centroid (lon,lat,z[km])' in line: s=replace(line.split(':')[-1],'(','') s=replace(s,')','') hypo=array(s.split(',')).astype('float') loop_go=False if 'Actual magnitude' in line: Mw=float(line.split(':')[-1].split(' ')[-1]) #compute station to hypo distances d=zeros(len(lonlat)) for k in range(len(lonlat)): d[k],az,baz=gps2dist_azimuth(lonlat[k,1],lonlat[k,0],hypo[1],hypo[0]) d[k]=d[k]/1000 #Run regression #W=ones(len(d))/vecnorm(log10(d)) W=ones(len(d)) #Define regression quantities dist=log10(d) data=log10(pgd) #make matrix of event weights W=diag(W) #Make matrix of data weights iall=ones((len(d),1)) Mw_all=Mw*ones(len(d)) G=c_[iall,expand_dims(Mw_all,1)*iall,expand_dims(Mw_all*dist,1)] #Run regression # log(PGD)=A+B*Mw+C*Mw*log(R) if norm==2: coefficients=lstsq(G,data)[0] A=coefficients[0] ; B=coefficients[1] ; C=coefficients[2] elif norm==1: P=matrix(W.dot(G)) q=matrix(W.dot(data)) coefficients=array(l1(P,q)) return A,B,C
def l1_fit(index, y, beta_d2=1.0, beta_d1=1.0, beta_seasonal=1.0, beta_step=5.0, period=12, growth=0.0, step_permissives=None): assert isinstance(y, np.ndarray) assert isinstance(index, np.ndarray) #x must be integer type for seasonality to make sense assert index.dtype.kind == 'i' n = len(y) m = n-2 p = period ys, y_min, y_max = mu.scale_numpy(y) D1 = mu.get_first_derivative_matrix_nes(index) D2 = mu.get_second_derivative_matrix_nes(index) H = mu.get_step_function_matrix(n) T = mu.get_T_matrix(p) B = mu.get_B_matrix_nes(index, p) Q = B*T #define F_matrix from blocks like in paper zero = mu.zero_spmatrix ident = mu.identity_spmatrix gvec = spmatrix(growth, range(m), [0]*m) zero_m = spmatrix(0.0, range(m), [0]*m) zero_p = spmatrix(0.0, range(p), [0]*p) zero_n = spmatrix(0.0, range(n), [0]*n) step_reg = mu.get_step_function_reg(n, beta_step, permissives=step_permissives) F_matrix = sparse([ [ident(n), -beta_d1*D1, -beta_d2*D2, zero(p, n), zero(n)], [Q, zero(m, p-1), zero(m, p-1), -beta_seasonal*T, zero(n, p-1)], [H, zero(m, n), zero(m, n), zero(p, n), step_reg] ]) w_vector = sparse([ mu.np2spmatrix(ys), gvec, zero_m, zero_p, zero_n ]) solution_vector = np.asarray(l1.l1(matrix(F_matrix), matrix(w_vector))).squeeze() #separate xbase = solution_vector[0:n] s = solution_vector[n:n+p-1] h = solution_vector[n+p-1:] #scale back to original if y_max > y_min: scaling = y_max - y_min else: scaling = 1.0 xbase = xbase*scaling + y_min s = s*scaling h = h*scaling seas = np.asarray(Q*matrix(s)).squeeze() steps = np.asarray(H*matrix(h)).squeeze() x = xbase + seas + steps solution = {'xbase': xbase, 'seas': seas, 'steps': steps, 'x': x, 'h': h, 's': s} return solution
def test_l1(self): from cvxopt import normal, setseed import l1 setseed(100) m, n = 500, 250 P = normal(m, n) q = normal(m, 1) u1 = l1.l1(P, q) u2 = l1.l1blas(P, q) self.assertAlmostEqualLists(list(u1), list(u2), places=3)
def test_l1(self): from cvxopt import normal, setseed import l1 setseed(100) m,n = 500,250 P = normal(m,n) q = normal(m,1) u1 = l1.l1(P,q) u2 = l1.l1blas(P,q) self.assertAlmostEqualLists(list(u1),list(u2),places=3)
def test_l1(): np.random.seed(42) m, n = 500, 100 P, q = cvxopt.normal(m, n), cvxopt.normal(m, 1) u = l1(P, q) qfit = P * u residual = qfit - q np.random.seed(None) mean_abs_res = sum(abs(residual)) / len(residual) print "mean abs residual: %s" % mean_abs_res assert mean_abs_res < 1.0
def l1tf_cvxopt_l1p(y, alpha, period=0, eta=1.0): n = y.size[0] m = n - 2 D = get_second_derivative_matrix(n) P = D * D.T q = -D * y n_contraints = m if period > 1: n_contraints += (period-1) G = zero_spmatrix(2 * n_contraints, m) G[:m, :m] = identity_spmatrix(m) G[m:2*m, :m] = - identity_spmatrix(m) h = matrix(alpha, (2 * n_contraints, 1), tc='d') if period > 1: B = get_B_matrix(n, period) T = get_T_matrix(period) Q = B*T DQ = D * Q G[2*m:2*m+period-1, :m] = DQ.T G[2*m+period-1:, :m] = -DQ.T h[2*m:] = eta res = solvers.qp(P, q, G, h) nu = res['x'] DT_nu = D.T * nu output = {} output['y'] = y output['x_with_seasonal'] = y - DT_nu if period > 1: #separate seasonal from non-seasonal by solving an #least norm problem ratio= eta/alpha Pmat = zero_spmatrix(m+period, period-1) Pmat[:m, :period-1] = DQ Pmat[m:(m+period), :period-1] = -ratio * T qvec = matrix(0.0, (m+period, 1), tc='d') qvec[:m] = D*(y-DT_nu) p_solution = l1.l1(matrix(Pmat), qvec) QP_solution = Q*p_solution output['p'] = p_solution output['s'] = QP_solution output['x'] = output['x_with_seasonal'] - output['s'] print 'sum seasonal is: %s' % sum(output['s'][:period]) return output
def PGD(pgd, r, coefficients, weight=True, norm=2, residual=False): ''' Run PGD regression IN: pgd is pgd data r are station/event distances coefficients is a 3 vector with A,B and C weight is boolean to apply distance weighting norm switches between L1 and L2 norm minimizing solver ''' mintol = 1e-5 #This value is used isntad of zero #Define regression coefficients A = coefficients[0] B = coefficients[1] C = coefficients[2] #Decide whether to apply distance weights if weight: #W = expand_dims(exp(-power(1./log10(pgd),2)/2/power(2*min(1./log10(pgd)),2)),1) W = expand_dims(exp(-power(r, 2) / 2 / power(2 * min(r), 2)), 1) #W = expand_dims(exp(-power(r,3)/2/power(2*min(r),3)),1) else: W = expand_dims(ones(len(r)), 1) # Green's functions G = expand_dims(B + C * (log10(r)), 1) # Clean data for small values i = where(pgd < mintol)[0] pgd[i] = mintol #Define data vector b = expand_dims(log10(pgd) - A, 1) # L1 or L2 norm minimizing solver if norm == 2: P = W * G q = W * b M = lstsq(P, q)[0] R = P.dot(M) - q res = vecnorm(R) elif norm == 1: P = matrix(W * G) q = matrix(W * b) print P print q M = l1(P, q)[0] #get residuals res = sum(abs(P * matrix(M) - q)) if residual: return M, res else: return M
def plot_l1_trend_fits(x, delta_values=(1 ,5, 10)): plt.figure(figsize=(16, 12)) plt.suptitle('Different trends for different $\delta$ s') for ii, delta in enumerate(delta_values): plt.subplot(len(delta_values), 1, ii + 1) filtered = l1(x, delta) plt.plot(x, label='Original signal') label = 'Filtered, $\delta$ = {}'.format(delta) plt.plot(filtered, linewidth=5, label=label, alpha=0.5) plt.legend(loc='best') fig_name = 'l1_trend_filtering_snp_{}.png'.format(len(x)) fig_path = os.path.join(_FIG_DIR, fig_name) plt.savefig(fig_path, format='png', dpi=1000)
def plot_l1_trend_fits(x, delta_values=(1, 5, 10)): plt.figure(figsize=(16, 12)) plt.suptitle('Different trends for different $\delta$ s') for ii, delta in enumerate(delta_values): plt.subplot(len(delta_values), 1, ii + 1) filtered = l1(x, delta) plt.plot(x, label='Original signal') label = 'Filtered, $\delta$ = {}'.format(delta) plt.plot(filtered, linewidth=5, label=label, alpha=0.5) plt.legend(loc='best') fig_name = 'l1_trend_filtering_snp_{}.png'.format(len(x)) fig_path = os.path.join(_FIG_DIR, fig_name) plt.savefig(fig_path, format='png', dpi=1000)
def trend_extraction(sample, season_len, reg1=10., reg2=0.5): sample_len = len(sample) season_diff = sample[season_len:] - sample[:-season_len] assert len(season_diff) == (sample_len - season_len) q = np.concatenate([season_diff, np.zeros([sample_len*2-3])]) q = np.reshape(q, [len(q),1]) q = matrix(q) M = get_toeplitz([sample_len-season_len, sample_len-1], np.ones([season_len])) D = get_toeplitz([sample_len-2, sample_len-1], np.array([1,-1])) P = np.concatenate([M, reg1*np.eye(sample_len-1), reg2*D], axis=0) P = matrix(P) delta_trends = l1(P,q) relative_trends = get_relative_trends(delta_trends) return sample-relative_trends, relative_trends
def fit_auto(X,Y, k, do_l1=False): """ Yi = [Xi-1 Xi-2 ... Xi-1-k]*[a1 a2 ... ak]T Solves for a_k's : auto-regression. """ assert k < len(X), "order more than the length of the vector." assert X.ndim==1 and Y.ndim==1, "Vectors are not one-dimensional." assert len(X)==len(Y), "Vectors are not of the same size." A = get_auto_mat2(X, k) A = np.c_[A, np.ones(A.shape[0])] b = Y[k-1:] if do_l1: sol = np.array(l1(cvx.matrix(A), cvx.matrix(b))) sol = np.reshape(sol, np.prod(sol.shape)) else: sol = np.linalg.lstsq(A, b)[0] return [A, b, sol]
def timeseries_inversion_L1(h5flat,h5timeseries): try: from l1 import l1 from cvxopt import normal,matrix except: print '-----------------------------------------------------------------------' print 'cvxopt should be installed to be able to use the L1 norm minimization.' print '-----------------------------------------------------------------------' sys.exit(1) # modified from sbas.py written by scott baker, 2012 total = time.time() A,B = design_matrix(h5flat) tbase,dateList,dateDict,dateDict2 = date_list(h5flat) dt = np.diff(tbase) BL1 = matrix(B) B1 = np.linalg.pinv(B) B1 = np.array(B1,np.float32) ifgramList = h5flat['interferograms'].keys() numIfgrams = len(ifgramList) # dset = h5flat[ifgramList[0]].get(h5flat[ifgramList[0]].keys()[0]) # data = dset[0:dset.shape[0],0:dset.shape[1]] dset=h5flat['interferograms'][ifgramList[0]].get(ifgramList[0]) data = dset[0:dset.shape[0],0:dset.shape[1]] numPixels = np.shape(data)[0]*np.shape(data)[1] print 'Reading in the interferograms' print numIfgrams,numPixels # data = np.zeros((numIfgrams,numPixels),np.float32) data = np.zeros((numIfgrams,numPixels)) for ni in range(numIfgrams): dset=h5flat['interferograms'][ifgramList[ni]].get(ifgramList[ni]) # dset = h5flat[ifgramList[ni]].get(h5flat[ifgramList[ni]].keys()[0]) d = dset[0:dset.shape[0],0:dset.shape[1]] # print np.shape(d) data[ni] = d.flatten(1) del d dataPoint = np.zeros((numIfgrams,1),np.float32) modelDimension = np.shape(B)[1] tempDeformation = np.zeros((modelDimension+1,numPixels),np.float32) print data.shape DataL1=matrix(data) L1ORL2=np.ones((numPixels,1)) for ni in range(numPixels): print ni dataPoint = data[:,ni] nan_ndx = dataPoint == 0. fin_ndx = dataPoint != 0. nan_fin = dataPoint.copy() nan_fin[nan_ndx] = 1 if not nan_fin.sum() == len(nan_fin): B1tmp = np.dot(B1,np.diag(fin_ndx)) # tmpe_ratea = np.dot(B1tmp,dataPoint) try: tmpe_ratea=np.array(l1(BL1,DataL1[:,ni])) zero = np.array([0.],np.float32) defo = np.concatenate((zero,np.cumsum([tmpe_ratea[:,0]*dt]))) except: tmpe_ratea = np.dot(B1tmp,dataPoint) L1ORL2[ni]=0 zero = np.array([0.],np.float32) defo = np.concatenate((zero,np.cumsum([tmpe_ratea*dt]))) tempDeformation[:,ni] = defo if not np.remainder(ni,10000): print 'Processing point: %7d of %7d ' % (ni,numPixels) del data timeseries = np.zeros((modelDimension+1,np.shape(dset)[0],np.shape(dset)[1]),np.float32) factor = -1*float(h5flat['interferograms'][ifgramList[0]].attrs['WAVELENGTH'])/(4.*np.pi) for ni in range(modelDimension+1): timeseries[ni] = tempDeformation[ni].reshape(np.shape(dset)[1],np.shape(dset)[0]).T timeseries[ni] = timeseries[ni]*factor del tempDeformation L1ORL2=np.reshape(L1ORL2,(np.shape(dset)[1],np.shape(dset)[0])).T timeseriesDict = {} for key, value in h5flat['interferograms'][ifgramList[0]].attrs.iteritems(): timeseriesDict[key] = value dateIndex={} for ni in range(len(dateList)): dateIndex[dateList[ni]]=ni if not 'timeseries' in h5timeseries: group = h5timeseries.create_group('timeseries') for key,value in timeseriesDict.iteritems(): group.attrs[key] = value for date in dateList: if not date in h5timeseries['timeseries']: dset = group.create_dataset(date, data=timeseries[dateIndex[date]], compression='gzip') print 'Time series inversion took ' + str(time.time()-total) +' secs' L1orL2h5=h5py.File('L1orL2.h5','w') gr=L1orL2h5.create_group('mask') dset=gr.create_dataset('mask',data=L1ORL2,compression='gzip') L1orL2h5.close()
def timeseries_inversion_L1(h5flat, h5timeseries): try: from l1 import l1 from cvxopt import normal, matrix except: print '-----------------------------------------------------------------------' print 'cvxopt should be installed to be able to use the L1 norm minimization.' print '-----------------------------------------------------------------------' sys.exit(1) # modified from sbas.py written by scott baker, 2012 total = time.time() A, B = design_matrix(h5flat) tbase, dateList, dateDict, dateDict2 = date_list(h5flat) dt = np.diff(tbase) BL1 = matrix(B) B1 = np.linalg.pinv(B) B1 = np.array(B1, np.float32) ifgramList = h5flat['interferograms'].keys() numIfgrams = len(ifgramList) # dset = h5flat[ifgramList[0]].get(h5flat[ifgramList[0]].keys()[0]) # data = dset[0:dset.shape[0],0:dset.shape[1]] dset = h5flat['interferograms'][ifgramList[0]].get(ifgramList[0]) data = dset[0:dset.shape[0], 0:dset.shape[1]] numPixels = np.shape(data)[0] * np.shape(data)[1] print 'Reading in the interferograms' print numIfgrams, numPixels # data = np.zeros((numIfgrams,numPixels),np.float32) data = np.zeros((numIfgrams, numPixels)) for ni in range(numIfgrams): dset = h5flat['interferograms'][ifgramList[ni]].get(ifgramList[ni]) # dset = h5flat[ifgramList[ni]].get(h5flat[ifgramList[ni]].keys()[0]) d = dset[0:dset.shape[0], 0:dset.shape[1]] # print np.shape(d) data[ni] = d.flatten(1) del d dataPoint = np.zeros((numIfgrams, 1), np.float32) modelDimension = np.shape(B)[1] tempDeformation = np.zeros((modelDimension + 1, numPixels), np.float32) print data.shape DataL1 = matrix(data) L1ORL2 = np.ones((numPixels, 1)) for ni in range(numPixels): print ni dataPoint = data[:, ni] nan_ndx = dataPoint == 0. fin_ndx = dataPoint != 0. nan_fin = dataPoint.copy() nan_fin[nan_ndx] = 1 if not nan_fin.sum() == len(nan_fin): B1tmp = np.dot(B1, np.diag(fin_ndx)) # tmpe_ratea = np.dot(B1tmp,dataPoint) try: tmpe_ratea = np.array(l1(BL1, DataL1[:, ni])) zero = np.array([0.], np.float32) defo = np.concatenate( (zero, np.cumsum([tmpe_ratea[:, 0] * dt]))) except: tmpe_ratea = np.dot(B1tmp, dataPoint) L1ORL2[ni] = 0 zero = np.array([0.], np.float32) defo = np.concatenate((zero, np.cumsum([tmpe_ratea * dt]))) tempDeformation[:, ni] = defo if not np.remainder(ni, 10000): print 'Processing point: %7d of %7d ' % (ni, numPixels) del data timeseries = np.zeros( (modelDimension + 1, np.shape(dset)[0], np.shape(dset)[1]), np.float32) factor = -1 * float( h5flat['interferograms'][ifgramList[0]].attrs['WAVELENGTH']) / (4. * np.pi) for ni in range(modelDimension + 1): timeseries[ni] = tempDeformation[ni].reshape( np.shape(dset)[1], np.shape(dset)[0]).T timeseries[ni] = timeseries[ni] * factor del tempDeformation L1ORL2 = np.reshape(L1ORL2, (np.shape(dset)[1], np.shape(dset)[0])).T timeseriesDict = {} for key, value in h5flat['interferograms'][ ifgramList[0]].attrs.iteritems(): timeseriesDict[key] = value dateIndex = {} for ni in range(len(dateList)): dateIndex[dateList[ni]] = ni if not 'timeseries' in h5timeseries: group = h5timeseries.create_group('timeseries') for key, value in timeseriesDict.iteritems(): group.attrs[key] = value for date in dateList: if not date in h5timeseries['timeseries']: dset = group.create_dataset(date, data=timeseries[dateIndex[date]], compression='gzip') print 'Time series inversion took ' + str(time.time() - total) + ' secs' L1orL2h5 = h5py.File('L1orL2.h5', 'w') gr = L1orL2h5.create_group('mask') dset = gr.create_dataset('mask', data=L1ORL2, compression='gzip') L1orL2h5.close()
def l1_fit( index, y, beta_d2=1.0, beta_d1=1.0, beta_seasonal=1.0, beta_step=1000.0, growth=0.0, seasonality_matrix=None ): """ Least Absolute Deviation Time Series fitting function lower-level than version operating on actual dates :param index: ndarray, index of numeric x-values representing time :param y: ndarray, the time-series y-values :param beta_d2: L1 regularization parameter on the second derivative :param beta_d1: L1 regularization parameter on the first derivative :param beta_seasonal: L1 regularization parameter on the seasonal components :param beta_step: L1 regularization parameter on the step-function components :param growth: the default growth rate that is regularized toward default 0 :param seasonality_matrix: matrix which maps seasonality variables onto the index of data points allows the problem to be written in purely matrix form comes from get_seasonality_matrix function :return: """ # print "beta_d2: %s" % beta_d2 # print "beta_seasonal: %s" % beta_seasonal assert isinstance(y, np.ndarray) assert isinstance(index, np.ndarray) # x must be integer type for seasonality to make sense # assert index.dtype.kind == 'i' # dimensions n = len(y) m = n - 2 p = seasonality_matrix.size[1] ys, y_min, y_max = mu.scale_numpy(y) # set up matrices d1 = mu.get_first_derivative_matrix_nes(index) d2 = mu.get_second_derivative_matrix_nes(index) h = mu.get_step_function_matrix(n) t = mu.get_T_matrix(p) q = seasonality_matrix * t zero = mu.zero_spmatrix ident = mu.identity_spmatrix gvec = spmatrix(growth, range(m), [0] * m) zero_m = spmatrix(0.0, range(m), [0] * m) zero_p = spmatrix(0.0, range(p), [0] * p) zero_n = spmatrix(0.0, range(n), [0] * n) # allow step-function regularization to change at some points # is this really needed? step_reg = mu.get_step_function_reg(n, beta_step) # define F_matrix from blocks like in white paper # so that the problem can be stated as a standard LAD problem # and solvable with the l1 program F_matrix = sparse( [ [ident(n), -beta_d1 * d1, -beta_d2 * d2, zero(p, n), zero(n)], [q, zero(m, p - 1), zero(m, p - 1), -beta_seasonal * t, zero(n, p - 1)], [h, zero(m, n), zero(m, n), zero(p, n), step_reg], ] ) # convert to sparse matrix w_vector = sparse([mu.np2spmatrix(ys), gvec, zero_m, zero_p, zero_n]) # solve LAD problem and convert back to numpy array solution_vector = np.asarray(l1.l1(matrix(F_matrix), matrix(w_vector))).squeeze() # separate into components base = solution_vector[0:n] seasonal_parameters = solution_vector[n : n + p - 1] step_jumps = solution_vector[n + p - 1 :] # scale back to original if y_max > y_min: scaling = y_max - y_min else: scaling = 1.0 base = base * scaling + y_min seasonal_parameters *= scaling step_jumps *= scaling seasonal_component = np.asarray(q * matrix(seasonal_parameters)).squeeze() step_component = np.asarray(h * matrix(step_jumps)).squeeze() model_without_seasonal = base + step_component model = model_without_seasonal + seasonal_component solution = { "base": base, "seasonal_component": seasonal_component, "step_component": step_component, "model": model, "model_without_seasonal": model_without_seasonal, "step_jumps": step_jumps, "seasonal_parameters": seasonal_parameters, } return solution
def l1_regression(A,b): A = matrix(A) # converitng to matrix, format accepted by the solver b = matrix(b) return l1(A,b)