def compute_moments(img): ''' Compute the moments of the given image. Parameters ---------- img : numpy.ndarray 2D image. Returns ------- mean : float The 1st moment. variance : float The 2nd moment. skewness : float The 3rd moment. kurtosis : float The 4th moment. ''' mean = nanmean(img, axis=None) variance = nanstd(img, axis=None) ** 2. skewness = np.nansum( ((img - mean) / np.sqrt(variance)) ** 3.) / np.sum(~np.isnan(img)) kurtosis = np.nansum( ((img - mean) / np.sqrt(variance)) ** 4.) / np.sum(~np.isnan(img)) - 3 return mean, variance, skewness, kurtosis
def moment3(xp, M1): ''' Calculate the 4th moment in a single direction of a set of tracks. in meters. Inputs: xp x or y locations of the drifter tracks [ndrifter,ntime] M1 First moment of tracks in a single direction Outputs: M3 Relative dispersion (squared or not) averaged over drifter pairs [ntime]. nnans Number of non-nan time steps in calculations for averaging properly. Otherwise drifters that have exited the domain could affect calculations. To combine with other calculations of relative dispersion, first multiply by nnans, then combine with other relative dispersion calculations, then divide by the total number of nnans. Example call: tracpy.calcs.moment3(xp, M1x) ''' tstart = time.time() dist = xp - M1 nnans = np.sum(~np.isnan(dist), axis=0) num = np.nansum(dist**3, axis=0)/nnans denom = (np.nansum(dist**2, axis=0)/nnans)**(3/2) M3 = num/denom print 'time for finding M: ', time.time()-tstart # # Distances squared, separately; times; number of non-nans for this set # np.savez(name[:-3] + 'D2.npz', D2=D2, t=t, nnans=nnans) # pdb.set_trace() return M3, nnans
def logprob2dcc(p,x,y,x_err,y_err): theta,scatter,rho = p[0],p[1],p[2] if np.abs(theta-np.pi/4)>np.pi/4: return -np.inf if scatter<0.0: return -np.inf if abs(rho)>1.0: return -np.inf # if width<0.0: # return -np.inf sigxy = rho*x_err*y_err displacement = 0.0 # Displacement away from the line Delta = (np.cos(theta)*y - np.sin(theta)*x)**2 # Displacement from zero # delta = (np.sin(theta)*y + np.cos(theta)*x-displacement)**2 Sigma = (np.sin(theta))**2*x_err**2+(np.cos(theta))**2*y_err**2+\ scatter-2*np.sin(theta)*np.cos(theta)*sigxy # sigma = (np.cos(theta))**2*x_err**2+(np.sin(theta))**2*y_err**2+width lp = -0.5*np.nansum(Delta/Sigma)-np.nansum(np.log(Sigma))*0.5#-\ # 0.5*np.nansum(delta/sigma)-np.nansum(np.log(sigma)) # lp = -0.5*np.nansum(Delta/Sigma)-0.5*np.log(scatter)*Sigma.size # lp = -0.5*np.nansum(Delta/Sigma)-0.5*scipy.stats.norm.logpdf(scatter,0,1.0) return lp
def clean_weights(weights, must_haves=None, fraction=0.5): if must_haves is None: must_haves=[True]*len(weights) if not any(must_haves): return [0.0]*len(weights) needs_replacing=[(np.isnan(x) or x==0.0) and must_haves[i] for (i,x) in enumerate(weights)] keep_empty=[(np.isnan(x) or x==0.0) and not must_haves[i] for (i,x) in enumerate(weights)] no_replacement_needed=[(not keep_empty[i]) and (not needs_replacing[i]) for (i,x) in enumerate(weights)] if not any(needs_replacing): return weights missing_weights=sum(needs_replacing) total_for_missing_weights=fraction*missing_weights/( float(np.nansum(no_replacement_needed)+np.nansum(missing_weights))) adjustment_on_rest=(1.0-total_for_missing_weights) each_missing_weight=total_for_missing_weights/missing_weights def _good_weight(value, idx, needs_replacing, keep_empty, each_missing_weight, adjustment_on_rest): if needs_replacing[idx]: return each_missing_weight if keep_empty[idx]: return 0.0 else: return value*adjustment_on_rest weights=[_good_weight(value, idx, needs_replacing, keep_empty, each_missing_weight, adjustment_on_rest) for (idx, value) in enumerate(weights)] xsum=sum(weights) weights=[x/xsum for x in weights] return weights
def test_nansum_with_boolean(self): # gh-2978 a = np.zeros(2, dtype=bool) try: np.nansum(a) except Exception: raise AssertionError()
def crunchy(eta, sec, hand=None, sigma=None): powers = [] powers_norm = [] y_axis = sec.get_y_axis() x_axis = sec.get_x_axis() if sigma is None: sigma = [np.absolute(y_axis[1] - y_axis[0]), np.absolute(x_axis[1] - x_axis[0])] for yi in range(len(y_axis)): y = y_axis[yi] for xi in range(len(x_axis)): x = x_axis[xi] this_weight = weight_function(eta, x, y, sigma) if this_weight is None: powers.append(None) powers_norm.append(None) else: variance = 1 / this_weight powers.append(sec.get([yi, xi]) / variance) powers_norm.append(1 / variance) p = np.nansum(list(filter(None, powers))) pn = np.nansum(list(filter(None, powers_norm))) # print("eta: " + str(eta)) # print(p) # print(pn) # print("p/pn: " + str(p/pn)) return eta, p / pn
def logprob2dslope(p,x,y,x_err,y_err): m,scatter = p[0],p[1] if scatter<0.0: return -np.inf sigma = (scatter+y_err**2+m**2*x_err**2) lp = -0.5*np.nansum((y-m*x)**2/sigma)-0.5*np.nansum(np.log(sigma)) return lp
def crunchy2(pt_and_sigma, sec, hand=None): pt, sigma = pt_and_sigma py, px = pt powers = [] powers_norm = [] y_axis = sec.get_y_axis() x_axis = sec.get_x_axis() px_y = np.absolute(y_axis[1] - y_axis[0]) px_x = np.absolute(x_axis[1] - x_axis[0]) if sigma == None: sigma = [px_y, px_x] if sigma[0] < px_y: sigma = [px_y, sigma[1]] if sigma[1] < px_x: sigma = [sigma[0], px_x] for yi in range(len(y_axis)): y = y_axis[yi] for xi in range(len(x_axis)): x = x_axis[xi] this_weight = weight_function2(y, x, py, px, sigma) if this_weight is None: powers.append(None) powers_norm.append(None) else: variance = 1 / this_weight powers.append(sec.get([yi, xi]) / variance) powers_norm.append(1 / variance) p = np.nansum(list(filter(None, powers))) pn = np.nansum(list(filter(None, powers_norm))) return pt, p / pn
def crunchy3(offset, eta, sec, sigma=None): powers = [] powers_norm = [] y_axis = sec.get_y_axis() x_axis = sec.get_x_axis() px_y = np.absolute(y_axis[1] - y_axis[0]) px_x = np.absolute(x_axis[1] - x_axis[0]) if sigma is None: sigma = [px_y, px_x] if sigma[0] < px_y: sigma = [px_y, sigma[1]] if sigma[1] < px_x: sigma = [sigma[0], px_x] for yi in range(len(y_axis)): y = y_axis[yi] for xi in range(len(x_axis)): x = x_axis[xi] y_eff = y + eta * offset ** 2 x_eff = x - offset this_weight = weight_function3(eta, y, x, y_eff, x_eff, sigma) if this_weight is None: powers.append(None) powers_norm.append(None) else: variance = 1 / this_weight powers.append(sec.get([yi, xi]) / variance) powers_norm.append(1 / variance) p = np.nansum(list(filter(None, powers))) pn = np.nansum(list(filter(None, powers_norm))) return offset, p / pn
def plothistory(self): a=self.a b=self.b plt.figure(figsize=(12,6)) I=np.concatenate([a.T,np.array(np.nansum(a[:,:3],1),ndmin=2), np.array(np.nansum(a[:,3:6],1),ndmin=2),np.array(np.nansum(a[:,6:],1),ndmin=2)],axis=0) plt.plot(range(b.size),b,'rx',ms=8,mew=2) plt.plot([10.5,10.5],[-1,I.shape[1]],'r',lw=2) plt.imshow(I,interpolation='nearest',cmap='winter') plt.colorbar() ax=plt.gca() ax.set_yticks(range(I.shape[0])) ax.set_yticklabels(['']*a.shape[0]+['color','rel len','abs len']) c1=plt.Circle((-1.5,0),radius=0.4,color='blue',clip_on=False) c2=plt.Circle((-1.5,1),radius=0.4,color='white',clip_on=False) c3=plt.Circle((-1.5,2),radius=0.4,color='yellow',clip_on=False) ax.add_patch(c1);ax.add_patch(c2);ax.add_patch(c3); c1=plt.Rectangle((-2,3),1,0.2,color='white',clip_on=False) c2=plt.Rectangle((-2.5,4),1.5,0.2,color='white',clip_on=False) c3=plt.Rectangle((-3,5),2,0.2,color='white',clip_on=False) ax.add_patch(c1);ax.add_patch(c2);ax.add_patch(c3); c1=plt.Rectangle((-2,6),1,0.2,color='gray',clip_on=False) c2=plt.Rectangle((-2.5,7),1.5,0.2,color='gray',clip_on=False) c3=plt.Rectangle((-3,8),2,0.2,color='gray',clip_on=False) c4=plt.Rectangle((-3.5,9),2.5,0.2,color='gray',clip_on=False) ax.add_patch(c1);ax.add_patch(c2);ax.add_patch(c3);ax.add_patch(c4); print I[-3,-1]
def exp1computeLL(self,dat,f): T=20 #initialize q=np.zeros(T+1); q[0]=self.q0 u=np.zeros(T+1); u[0]=self.u0 a=np.zeros((T+1,D));self.f=[] p=np.zeros((T+1,D)); a[0,:]=np.ones(10)/3.0 a[0,-1]=np.nan a[0,:3]*=q[0] a[0,3:6]*=(1-q[0])*u[0] a[0,6:]*=(1-q[0])*(1-u[0]) phase=0 LL=0 #print a[0,:] for t in range(T): if t>10: phase=1 else: phase=0 p[t,:]=getProb(a[t,:],self.d) m=data2a[dat[t],:,phase] w=np.power(a[t,:],self.m) loglik= np.nansum(np.log(np.maximum(0.001,p[t,m==f[t]]))) if f[t]==1: s=m*w a[t+1,:]= self.g*s/np.nansum(s) + (1-self.g)*a[t,:] else: s=(1-m)*w a[t+1,:]= self.h*s/np.nansum(s) + (1-self.h)*a[t,:] #print t,dat[t],f[t],np.nansum(p[t,m==f[t]]),loglik #print 'm= ',m #print 'p= ',p LL+=loglik return LL
def SWEmeltplot(data,beginyear,endyear): SWE = np.zeros(endyear+1-beginyear) melt = np.zeros(endyear+1-beginyear) years= np.arange(beginyear, endyear+1) stationcount = [] for k in range(len(SWE)): count = 0 for i in range(len(data)): for j in range(len(data[i].monsum)): if(data[i].monsum[j].year==years[k]): count +=1 SWE[k]=np.nansum(data[i].monsum[j].SWE)+SWE[k] melt[k]=np.nansum(data[i].monsum[j].melt)+melt[k] SWE[k] =SWE[k]/count #returns the mean SWE for all stations melt[k] =melt[k]/count #returns the mean melt for all stations stationcount.append(count) plt.figure() plt.subplot(3,1,1) plt.plot(years, SWE) plt.title('SWE') plt.subplot(3,1,2) plt.plot(years, melt) plt.title('melt') plt.subplot(3,1,3) plt.bar(years, stationcount) return(SWE,melt)
def bic(em_fit_result_dict, LL_all): ''' Compute the Bayesian Information Criterion score ''' # Number of parameters: # - mixt_target: Tnum # - mixt_random: Tnum # - mixt_nontargets: Tnum # - alpha: 1 # - beta: 1 # First count the Loglikelihood bic_tot = -2.*np.nansum(LL_all[np.isfinite(LL_all)]) # Then count alpha, beta appropriately K = 2 bic_tot += K*np.log(np.nansum(np.isfinite(LL_all))) # Finally, the mixture proportions per condition for T_i, T in enumerate(em_fit_result_dict['T_space']): K = 2 + int(T > 1) bic_tot += K*np.log(np.nansum(np.isfinite(LL_all[T_i]))) return bic_tot
def bic(em_fit_result_dict, LL_all): ''' Compute the Bayesian Information Criterion score Split it, associating the parameters to the number of datapoint they really take care of. ''' # Number of parameters: # - mixt_target_tr: 1 # - mixt_random_tr: 1 # - mixt_nontarget_trk: 1 # - alpha: 1 # - beta: 1 # - gamma: 1 # First count the Loglikelihood bic_tot = -2. * np.nansum(LL_all[np.tril_indices(LL_all.shape[0])]) # Then count alpha, beta and gamma, for all datapoints appropriately K = 3 bic_tot += K * np.log(np.nansum(np.isfinite(LL_all))) # Now do the mixture proportions per condition for nitems_i, nitems in enumerate(em_fit_result_dict['T_space']): for trecall_i, trecall in enumerate(em_fit_result_dict['T_space']): if trecall <= nitems: K = 3 bic_tot += K * np.log(np.nansum(np.isfinite(LL_all[nitems_i, trecall_i]))) return bic_tot
def Q_factor(A, B): """Compute the "overlap" between the images A and B. """ A_norm = np.nansum(A ** 2) ** 0.5 B_norm = np.nansum(B ** 2) ** 0.5 values = (A / A_norm) * (B / B_norm) return np.nansum(values)
def test_average(comm): import warnings dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json')) # unweighted with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) avg = dataset.average('mu') for var in dataset.variables: if var in dataset._fields_to_sum: x = numpy.nansum(dataset[var], axis=-1) else: x = numpy.nanmean(dataset[var], axis=-1) testing.assert_allclose(x, avg[var]) # weighted weights = numpy.random.random(dataset.shape) dataset['weights'] = weights avg = dataset.average('mu', weights='weights') for var in dataset: if var in dataset._fields_to_sum: x = numpy.nansum(dataset[var], axis=-1) else: x = numpy.nansum(dataset[var]*dataset['weights'], axis=-1) x /= dataset['weights'].sum(axis=-1) testing.assert_allclose(x, avg[var])
def reportCreate(data, paramDict): report = copy.deepcopy(paramDict) setKeys = data["DataSets"].keys() # Order all Mod first, then all Org setKeys.sort() bestRes = "" start = 0 end = len(setKeys) middle = end / 2 i = start while i < end / 2: # Calculate Score modBs = np.array(data["DataSets"][setKeys[i]]) obsBs = np.array(data["DataSets"][setKeys[middle]]) modBsmean = nanmean(modBs) obsBsmean = nanmean(obsBs) obsBsMinModBs = obsBs - modBs obsBsMinMean = obsBs - obsBsmean SSres = np.nansum(obsBsMinModBs ** 2) SStot = np.nansum(obsBsMinMean ** 2) ResNorm = SSres ** 0.5 if i == 0: bestRes = copy.copy(ResNorm) report[(setKeys[i] + "_RN")] = ResNorm # Norm of residuals i = i + 1 middle = middle + 1 return report, bestRes
def bin_data(xs, ys, ivars, xps): """ Bin data onto a uniform grid using medians. Args: `xs`: `[N, M]` array of xs `ys`: `[N, M]` array of ys `ivars`: `[N, M]` array of y-ivars `xps`: `M'` grid of x-primes for output template Returns: `yps`: `M'` grid of y-primes """ all_ys, all_xs, all_ivars = np.ravel(ys), np.ravel(xs), np.ravel(ivars) dx = xps[1] - xps[0] # ASSUMES UNIFORM GRID yps = np.zeros_like(xps) for i,t in enumerate(xps): ind = (all_xs >= t-dx/2.) & (all_xs < t+dx/2.) if np.sum(ind) > 0: #yps[i] = np.nanmedian(all_ys[ind]) yps[i] = np.nansum(all_ys[ind] * all_ivars[ind]) / np.nansum(all_ivars[ind] + 1.) # MAGIC ind_nan = np.isnan(yps) yps.flat[ind_nan] = np.interp(xps[ind_nan], xps[~ind_nan], yps[~ind_nan]) return yps
def test_gradient(self): x = np.linspace(-np.pi, np.pi, 256) y = np.linspace(-np.pi, np.pi, 256).reshape(-1, 1) g = RegularGrid((0, 0, 2*np.pi/255, 2*np.pi/255, 0, 0), values=np.sin(x)*np.cos(y)) gx, gy = misc.gradient(g) self.assertTrue(np.nansum(np.abs(gx[:,:,0] - np.cos(x)*np.cos(y))) < 7.0) self.assertTrue(np.nansum(np.abs(gy[:,:,0] + np.sin(x)*np.sin(y))) < 7.0)
def compute_genetic_distance(X, **kwargs): """Given genotype matrix X, returns pairwise genetic distance between individuals using the estimator described in Theorem 1. Args: X: n * p matrix of 0/1/2/nan, n is #individuals, p is #SNPs """ n, p = X.shape missing = np.isnan(X) col_sums = np.nansum(X, axis=0) col_counts = np.sum(~missing, axis=0) mu_hat = col_sums / 2. / col_counts # p dimensional eta0_hat = np.nansum(X**2 - X, axis=0) / 2. / col_counts - mu_hat**2 X_tmp = X/2. X_tmp[missing] = 0 non_missing = np.array(~missing, dtype=float) X_shifted = X_tmp - mu_hat gdm_squared = 2. * np.mean(eta0_hat) - 2. * np.dot(X_shifted, X_shifted.T) / np.dot(non_missing, non_missing.T) gdm_squared[np.diag_indices(n)] = 0. if len(gdm_squared[gdm_squared < 0]) > 0: # shift all entries by the smallest amount that makes them non-negative shift = - np.min(gdm_squared[gdm_squared < 0]) gdm_squared += shift gdm_squared[np.diag_indices(n)] = 0. gdm = np.sqrt(np.maximum(gdm_squared, 0.)) return gdm
def precipincloudTF(CL,abovesize=100): """ This method tells whether precipitation in cloud (horicloud + verticloud) was observed during the entire cloud period: true or false. Default value abovesize=100 for precipitation >100 microns. Noise level at 1e-2.""" # Do not use to count the total amount of rain detected as some scans may be counted more than once. pos=[i for i,x in enumerate(CL.sd) if 'p1' in x["sdtype"].lower()] # finding P1 size dist if len(pos)==0: pos=[i for i,x in enumerate(CL.sd) if 'p' in x["sdtype"].lower()] # finding P size dist if len(pos)==1: pos=int(pos[0]) elif len(pos)>1: for p in pos: print("%d - %s" % (p, CL.sd[p]["Distname"])) pos=int(raw_input("What is the prefered precipitation distribution? Enter the number.")) else: pos=nan if type(pos)==float: if isnan(pos): precb=nan elif type(CL.sd[pos]["time"])==float: precb=nan; elif np.shape(CL.sd[pos]["time"])[0]<=1: precb=nan else: Pas=samac.dNdlogDp2N(CL.sd[pos],abovesize,nan) #Pas=np.ma.masked_less_equal(Pas,1e-2); # nansum doesn't seem to handle masked arrays. Pas[Pas<=1e-2]=0; t100=0; for b in range(np.shape(CL.times['horicloud'])[0]): if len(Pas[(CL.sd[pos]["time"]<=CL.times['horicloud'][b][1])*(CL.sd[pos]["time"]>=CL.times['horicloud'][b][0])])==0: pass else: t100=t100+np.nansum(Pas[(CL.sd[pos]["time"]<=CL.times['horicloud'][b][1])*(CL.sd[pos]["time"]>=CL.times['horicloud'][b][0])]) for b in range(np.shape(CL.times['verticloud'])[0]): if len(Pas[(CL.sd[pos]["time"]<=CL.times['verticloud'][b][1])*(CL.sd[pos]["time"]>=CL.times['verticloud'][b][0])])==0: pass else: t100=t100+np.nansum(Pas[(CL.sd[pos]["time"]<=CL.times['verticloud'][b][1])*(CL.sd[pos]["time"]>=CL.times['verticloud'][b][0])]) if t100>0: precb=True else: precb=False return precb
def fuzzyKmeans(samples,fixCenter=None,iter=5,fuzzParam=1.5): #Not actually k means yet just 3 means if fixCenter is not None: dMeans = [min(samples)+0.01 , fixCenter ,max(samples)-0.01] else: dMeans = [min(samples)+0.01 , mean(samples) ,max(samples)-0.01] begDeg = map(None,numpy.zeros(len(samples))) midDeg = map(None,numpy.zeros(len(samples))) endDeg = map(None,numpy.zeros(len(samples))) for j in range(iter): for k in range(len(samples)): pBeg = (1.0/(samples[k] - dMeans[2])**2)**(1.0/(fuzzParam-1)) pMid = (1.0/(samples[k] - dMeans[1])**2)**(1.0/(fuzzParam-1)) pEnd = (1.0/(samples[k] - dMeans[0])**2)**(1.0/(fuzzParam-1)) nmlz = pBeg + pMid + pEnd begDeg[k] = pBeg/nmlz; midDeg[k] = pMid/nmlz; endDeg[k] = pEnd/nmlz #Update means 0 and 2, the other should stay at zero! (Change this for general purpose k-means) dMeans[0] = numpy.nansum((numpy.array(endDeg)**fuzzParam)*numpy.array(samples))/numpy.nansum(numpy.array(endDeg)**fuzzParam) if fixCenter is None: dMeans[1] = numpy.nansum((numpy.array(midDeg)**fuzzParam)*numpy.array(samples))/numpy.nansum(numpy.array(midDeg)**fuzzParam) dMeans[2] = numpy.nansum((numpy.array(begDeg)**fuzzParam)*numpy.array(samples))/numpy.nansum(numpy.array(begDeg)**fuzzParam) return dMeans
def nansum(self, axis=None, dtype=None, out=None): return UncertainQuantity( np.nansum(self.magnitude, axis, dtype, out), self.dimensionality, (np.nansum(self.uncertainty.magnitude**2, axis))**0.5, copy=False )
def effrad(CL,inst,bindist='lin'): """ This method returns the effective radius for a given instrument for the entire cloud period. The radius is in the same units as the instrument's units (usually micrometers). Note that one might get the effective diameter if the instrument's size bins are diameters. example: CloudObj.effrad(inst='FSSP96',bindist='lin') bindist is 'lin' if the difference between bins is linearly distributed (FSSPs) and 'log' if they are logarythmically distributed (PCASP)""" # according to the formula in https://en.wikipedia.org/wiki/Cloud_drop_effective_radius latest access on Oct 2013. [pos,sd]=[[i,sd] for i,sd in enumerate(CL.sd) if sd["Distname"].lower() == inst.lower()][0] # building dr (dradius) vector R=sd["bins"]; t=len(R) b=np.zeros([t]); h=np.zeros([t]); if bindist=='lin': for i in range(1,t): b[i]=(R[i-1]+R[i])/2.; for i in range(0,t-1): h[i]=(R[i+1]+R[i])/2.; b[0]=R[0]-(R[1]-h[0]); h[t-1]=R[t-1]+(b[t-1]-R[t-2]); dR=h-b; elif bindist=='log': for i in range(1,t): b[i]=10**((np.log10(R[i-1])+np.log10(R[i]))/2.); for i in range(0,t-1): h[i]=10**((np.log10(R[i+1])+np.log10(R[i]))/2.); b[0]=10**(np.log10(R[0])+(log10(R[1])-log10(h[1]))); h[t-1]=10**(np.log10(R[t-1])-(log10(b[t-2])-np.log10(R[t-2]))); dR=h-b; else: print("[effrad] bindist option entry is neither 'lin' or 'log'.") # calculating the effective radius ER=np.nansum((sd["bins"]**3 *dR) * sd["data"].transpose(),axis=1)/np.nansum((sd["bins"]**2 *dR) * sd["data"].transpose(),axis=1) return ER
def LinearSolveAll(): Dir=os.getcwd(); DataDir=Dir + '/DataFormatted/'; Locations=np.genfromtxt(DataDir+'SeismicLocations.csv'); Locations[:,0]=Locations[:,0]-360; Density=np.genfromtxt(DataDir+'DenseAll.csv'); Qs=np.genfromtxt(DataDir+'LongLatSurfaceHeat.csv',skip_header=1,delimiter=','); Qm=np.genfromtxt(DataDir+'MantleHeat.txt',skip_header=1,delimiter=','); QsInterp=Nearest2D(Qs[:,0:2],Qs[:,2]); QmInterp=Nearest2D(Qm[:,0:2],Qm[:,2]); Avocado=6.022e23; # mols to atoms conversion qs=QsInterp(Locations[:,0:2])/1000; qm=QmInterp(Locations[:,0:2])/1000; #Density[Density>3.1]=np.nan; Fels=(3-Density)/(0.3); Fels[Density<2.7]=1; Fels[Density>3]=0; years=365.24*24*60*60;#years to seconds conversion Depth=np.genfromtxt(DataDir+'Depth.csv'); dz=(Depth[1]-Depth[0])*1000; UContentU=2.8e-6/238; #upper crust uranium mass fraction ThContentU=UContentU*3.8/232; #upper crust thorium mass fraction K40ContentU=2*120e-6*3.4e-2/94; #upper crust thorium mass fraction UContentL=0.2e-6/238; #mol/g of each cell ThContentL=1.2e-6/232; K40ContentL=2*120e-6*0.6e-2/94; alpha238=7.41e-12;#Joules/decay alpha235=7.24e-12;#Joules/decay alpha232=6.24e-12;#Joules/decay beta=1.14e-13; #Joules/decay LamU238 = np.log(2)/(4.468*1e9);#% decay rate of U in years LamTh232 = np.log(2)/(1.405e10); # decay rate of Th in years LamU235 = np.log(2)/(703800000); #decay rate of 235U in years LamK40=np.log(2)/1.248e9;#decay rate of K40 in years UraniumHeatL=alpha238*Avocado*UContentL*LamU238/years+alpha235*Avocado*UContentL*LamU235/years/137.88; ThoriumHeatL=alpha232*Avocado*ThContentL*LamTh232/years; KHeatL=beta*Avocado*K40ContentL*LamK40/years; TotalHeatL=UraniumHeatL+ThoriumHeatL+KHeatL; # W/gram UraniumHeatU=alpha238*Avocado*UContentU*LamU238/years+alpha235*Avocado*UContentU*LamU235/years/137.88; ThoriumHeatU=alpha232*Avocado*ThContentU*LamTh232/years; KHeatU=beta*Avocado*K40ContentU*LamK40/years; qc=qs-qm; FluxL=np.nansum((1-Fels)*TotalHeatL*dz*Density*1e6,0); TotalHeatU=(qc-FluxL)/np.nansum(Fels*Density*1e6*dz,0); print(TotalHeatL) print(dz) plt.close('all') return qc*1e3 #return in W/g
def orientation_numpy(normals, weights): # Project the normals against the plane dx, dy, dz = np.rollaxis(normals, 2) # Use the quadruple angle formula to push everything around the # circle 4 times faster, like doing mod(x,pi/2) qz = 4 * dz * dx * dx * dx - 4 * dz * dz * dz * dx qx = dx * dx * dx * dx - 6 * dx * dx * dz * dz + dz * dz * dz * dz # Build the weights using a threshold, finding the normals lying on # the XZ plane d = 0.3 global cx, qqx, qqz cx = np.max((1.0 - dy * dy / (d * d), 0 * dy), 0) w = weights * cx qqx = np.nansum(w * qx) / w.sum() qqz = np.nansum(w * qz) / w.sum() angle = np.arctan2(qqz, qqx) / 4 q0 = np.array([np.cos(angle), 0, np.sin(angle)]) q0 /= np.sqrt(np.dot(q0, q0)) q2 = np.cross(q0, np.array([0, 1, 0])) # Build an output matrix out of the components mat = np.vstack((q0, np.array([0, 1, 0]), q2)) axes = expmap.rot2axis(mat) return axes
def calculate_avg(): #DONE global data_ratios_avg global data_ratios_std #remove nan values of the weights weights_nan = np.zeros((nb_rows, 1)) weights_nan_sq = np.zeros((nb_rows, 1)) nb_files = np.ones((nb_rows, 1)) * len(args.xvgfilenames) tmp_weights_nan = np.zeros((nb_rows, len(args.xvgfilenames))) for r in range(0, nb_rows): tmp_weights_nan[r,:] = weights for f_index in range(0, len(args.xvgfilenames)): if np.isnan(data_ratios[r,f_index]): tmp_weights_nan[r,f_index] = 0 nb_files[r,0] -= 1 weights_nan[:,0] = np.nansum(tmp_weights_nan, axis = 1) weights_nan_sq[:,0] = np.nansum(tmp_weights_nan**2, axis = 1) weights_nan[weights_nan == 0] = 1 #avg data_ratios_avg = np.zeros((nb_rows,1)) data_ratios_avg[:,0] = scipy.stats.nanmean(data_ratios * weights * nb_files / weights_nan, axis = 1) #std tmp_std = np.zeros((nb_rows, 1)) tmp_std[:,0] = np.nansum(weights * (data_ratios - data_ratios_avg[:,0:1])**2, axis = 1) tmp_div = np.copy((weights_nan)**2 - weights_nan_sq) tmp_div[tmp_div == 0] = 1 data_ratios_std = np.sqrt(weights_nan / tmp_div * tmp_std) return
def likelihood(self, a=None, b=None, s=None): """ \sum_{i,j} [w_{ij}[y_{i,j} s_j(a_i + b_j) - log(1 + exp(s_j(a_i + b_j)))] """ if ((a is None) and (b is None) and (s is None)): a = np.array(self.a_est.values()) b = np.array(self.b_est.values()) if self.model is '2PL': s = np.array(self.s_est.values()) c = a[self.obser['index_user']] + b[self.obser['index_item']] if (self.model is '2PL') and (s is not None): c = s[self.obser['index_item']] * c pos = self.data[self.response].values > 0 # account for weights w = 1.0 if self.wts is not None: w = _fc(self.data[self.wts]) first_term = np.nansum(w[pos] * c[pos]) else: first_term = np.nansum(c[pos]) second_term = np.nansum(w * np.log(1 + np.exp(c))) return (first_term - second_term - self.alpha * np.sum(a*a) - self.alpha * np.sum(b*b))
def integrate(self, frequencies=None, radius=2.7, nooffset=False, azel='az'): """ Given a radius calculate beam integral inside the radius and also the total integral """ if frequencies is None: frequencies = self.cfg['synth']['freq'] lisdic = [] for i, freq in enumerate(frequencies): if freq in self.cfg['synth']['freq']: dic = {} dic['frequency'] = freq if azel in ('az', 'el'): find = self.cfg['synth']['freq'].index(freq)*2 + 1 else: find = self.cfg['synth']['freq'].index(freq)*2 + 2 if not nooffset: ydata = numpy.sqrt(self.data[:, find]**2 - self.offset**2) else: ydata = self.data[:, find] if azel in ('az', 'el'): xdata = self.data[:, 0] else: xdata = numpy.sqrt(self.data[:, 0]**2 + self.data[:, 1]**2) ind = numpy.where(self.data[:, 0] < 0) xdata[ind] = -xdata[ind] idx = numpy.where(numpy.abs(xdata) <= radius) dic['inner'] = numpy.nansum(ydata[idx]) dic['all'] = numpy.nansum(ydata) lisdic.append(dic) print freq, dic['inner'], dic['all'] return pd.DataFrame(lisdic)
def autocorr_test(_xdata, _ydata): import numpy as np import pandas as pd from statsmodels.stats.diagnostic import acorr_ljungbox from statsmodels.tsa.stattools import acf #all statst need regularly spaced, continuous time series - just y variable #Durbin-Watson statistics: # calculated correctly with missing data # but no significance level. Apparently critical values for DW are not implemented in any python library #ACF: # crashes on missing data # Ljung-Box: # crashes on missing data too _ydata=np.ma.masked_invalid(_ydata) #autocorrelation in residuals #this is acf function that does not allow nans # print "\nautocorrelation for first three lags:", acf(_ydata)[1:4] #this is from pandas, is nan agnostic pdf=pd.Series(_ydata, index=_xdata, copy=True) print "autocorrelation for first three lags:", [pdf.autocorr(i) for i in range(1,4)] #durbin-watson a=_ydata[:-1].astype('float') b=_ydata[1:].astype('float') _stat=np.nansum((b-a)**2)/np.nansum(_ydata**2) print "Durbin-Watson statistic (close to 2 if no autocorrelation):", _stat _stat, _pvalue=acorr_ljungbox(_ydata, lags=1, boxpierce=False) print "Ljung-Box p-value on lag 1 autocorrelation:", _pvalue print ""
def _detect_outliers_core(self, imgfile, motionfile, runidx, cwd=None): """ Core routine for detecting outliers """ if not cwd: cwd = os.getcwd() # read in functional image if isinstance(imgfile, str): nim = load(imgfile) elif isinstance(imgfile, list): if len(imgfile) == 1: nim = load(imgfile[0]) else: images = [load(f) for f in imgfile] nim = funcs.concat_images(images) # compute global intensity signal (x, y, z, timepoints) = nim.get_shape() data = nim.get_data() affine = nim.get_affine() g = np.zeros((timepoints, 1)) masktype = self.inputs.mask_type if masktype == 'spm_global': # spm_global like calculation iflogger.debug('art: using spm global') intersect_mask = self.inputs.intersect_mask if intersect_mask: mask = np.ones((x, y, z), dtype=bool) for t0 in range(timepoints): vol = data[:, :, :, t0] # Use an SPM like approach mask_tmp = vol > \ (_nanmean(vol) / self.inputs.global_threshold) mask = mask * mask_tmp for t0 in range(timepoints): vol = data[:, :, :, t0] g[t0] = _nanmean(vol[mask]) if len(find_indices(mask)) < (np.prod((x, y, z)) / 10): intersect_mask = False g = np.zeros((timepoints, 1)) if not intersect_mask: iflogger.info('not intersect_mask is True') mask = np.zeros((x, y, z, timepoints)) for t0 in range(timepoints): vol = data[:, :, :, t0] mask_tmp = vol > \ (_nanmean(vol) / self.inputs.global_threshold) mask[:, :, :, t0] = mask_tmp g[t0] = np.nansum(vol * mask_tmp)/np.nansum(mask_tmp) elif masktype == 'file': # uses a mask image to determine intensity maskimg = load(self.inputs.mask_file) mask = maskimg.get_data() affine = maskimg.get_affine() mask = mask > 0.5 for t0 in range(timepoints): vol = data[:, :, :, t0] g[t0] = _nanmean(vol[mask]) elif masktype == 'thresh': # uses a fixed signal threshold for t0 in range(timepoints): vol = data[:, :, :, t0] mask = vol > self.inputs.mask_threshold g[t0] = _nanmean(vol[mask]) else: mask = np.ones((x, y, z)) g = _nanmean(data[mask > 0, :], 1) # compute normalized intensity values gz = signal.detrend(g, axis=0) # detrend the signal if self.inputs.use_differences[1]: gz = np.concatenate((np.zeros((1, 1)), np.diff(gz, n=1, axis=0)), axis=0) gz = (gz - np.mean(gz)) / np.std(gz) # normalize the detrended signal iidx = find_indices(abs(gz) > self.inputs.zintensity_threshold) # read in motion parameters mc_in = np.loadtxt(motionfile) mc = deepcopy(mc_in) (artifactfile, intensityfile, statsfile, normfile, plotfile, displacementfile, maskfile) = self._get_output_filenames(imgfile, cwd) mask_img = Nifti1Image(mask.astype(np.uint8), affine) mask_img.to_filename(maskfile) if self.inputs.use_norm: brain_pts = None if self.inputs.bound_by_brainmask: voxel_coords = np.nonzero(mask) coords = np.vstack((voxel_coords[0], np.vstack((voxel_coords[1], voxel_coords[2])))).T brain_pts = np.dot(affine, np.hstack((coords, np.ones((coords.shape[0], 1)))).T) # calculate the norm of the motion parameters normval, displacement = _calc_norm(mc, self.inputs.use_differences[0], self.inputs.parameter_source, brain_pts=brain_pts) tidx = find_indices(normval > self.inputs.norm_threshold) ridx = find_indices(normval < 0) if displacement is not None: dmap = np.zeros((x, y, z, timepoints), dtype=np.float) for i in range(timepoints): dmap[voxel_coords[0], voxel_coords[1], voxel_coords[2], i] = displacement[i, :] dimg = Nifti1Image(dmap, affine) dimg.to_filename(displacementfile) else: if self.inputs.use_differences[0]: mc = np.concatenate((np.zeros((1, 6)), np.diff(mc_in, n=1, axis=0)), axis=0) traval = mc[:, 0:3] # translation parameters (mm) rotval = mc[:, 3:6] # rotation parameters (rad) tidx = find_indices(np.sum(abs(traval) > self.inputs.translation_threshold, 1) > 0) ridx = find_indices(np.sum(abs(rotval) > self.inputs.rotation_threshold, 1) > 0) outliers = np.unique(np.union1d(iidx, np.union1d(tidx, ridx))) # write output to outputfile np.savetxt(artifactfile, outliers, fmt='%d', delimiter=' ') np.savetxt(intensityfile, g, fmt='%.2f', delimiter=' ') if self.inputs.use_norm: np.savetxt(normfile, normval, fmt='%.4f', delimiter=' ') if isdefined(self.inputs.save_plot) and self.inputs.save_plot: import matplotlib matplotlib.use(config.get("execution", "matplotlib_backend")) import matplotlib.pyplot as plt fig = plt.figure() if isdefined(self.inputs.use_norm) and self.inputs.use_norm: plt.subplot(211) else: plt.subplot(311) self._plot_outliers_with_wave(gz, iidx, 'Intensity') if isdefined(self.inputs.use_norm) and self.inputs.use_norm: plt.subplot(212) self._plot_outliers_with_wave(normval, np.union1d(tidx, ridx), 'Norm (mm)') else: diff = '' if self.inputs.use_differences[0]: diff = 'diff' plt.subplot(312) self._plot_outliers_with_wave(traval, tidx, 'Translation (mm)' + diff) plt.subplot(313) self._plot_outliers_with_wave(rotval, ridx, 'Rotation (rad)' + diff) plt.savefig(plotfile) plt.close(fig) motion_outliers = np.union1d(tidx, ridx) stats = [{'motion_file': motionfile, 'functional_file': imgfile}, {'common_outliers': len(np.intersect1d(iidx, motion_outliers)), 'intensity_outliers': len(np.setdiff1d(iidx, motion_outliers)), 'motion_outliers': len(np.setdiff1d(motion_outliers, iidx)), }, {'motion': [{'using differences': self.inputs.use_differences[0]}, {'mean': np.mean(mc_in, axis=0).tolist(), 'min': np.min(mc_in, axis=0).tolist(), 'max': np.max(mc_in, axis=0).tolist(), 'std': np.std(mc_in, axis=0).tolist()}, ]}, {'intensity': [{'using differences': self.inputs.use_differences[1]}, {'mean': np.mean(gz, axis=0).tolist(), 'min': np.min(gz, axis=0).tolist(), 'max': np.max(gz, axis=0).tolist(), 'std': np.std(gz, axis=0).tolist()}, ]}, ] if self.inputs.use_norm: stats.insert(3, {'motion_norm': {'mean': np.mean(normval, axis=0).tolist(), 'min': np.min(normval, axis=0).tolist(), 'max': np.max(normval, axis=0).tolist(), 'std': np.std(normval, axis=0).tolist(), }}) save_json(statsfile, stats)
def nsum(x): return np.nansum(x)
def _CovarianceGeneration(args): ReturnDTs = pd.Series(args["FT"].getDateTime()) args["FT"].start() iReturn = None iReturnDTs = [] SampleFilterFactors = [] SampleFilterStr = args["CovESTArgs"]["有效样本条件"] FactorNames = args["FT"].FactorNames.copy() FactorNames.sort(key=len, reverse=True) for iFactor in FactorNames: if SampleFilterStr.find('@' + iFactor) != -1: SampleFilterFactors.append(iFactor) SampleFilterStr = SampleFilterStr.replace( '@' + iFactor, 'iData[\'' + iFactor + '\']') if args["ModelArgs"]['运行模式'] == '串行': # 运行模式为串行 ProgBar = ProgressBar(max_value=len(args["RiskESTDTs"])) ProgBar.start() else: ProgBar = None for i, iDT in enumerate(args["RiskESTDTs"]): iInd = (ReturnDTs <= iDT).sum() - 1 if iInd < args["CovESTArgs"]["样本长度"] - 1: # 样本不足, 跳过 if ProgBar is not None: ProgBar.update(i + 1) else: args['Sub2MainQueue'].put((args["PID"], 1, None)) continue args["FT"].move(iDT) iIDs = args["FT"].getID(idt=iDT, is_filtered=True) iLastDTs = iReturnDTs iReturnDTs = list(ReturnDTs.iloc[iInd - args["CovESTArgs"]["样本长度"] + 1:iInd + 1]) iNewDTs = sorted(set(iReturnDTs).difference(set(iLastDTs))) if iReturn is not None: iReturn = pd.concat([ iReturn, args["FT"].readData(factor_names=[args["ModelArgs"]["收益率因子"]], dts=iNewDTs) ]).iloc[0].loc[iReturnDTs, :] for jFactor in SampleFilterFactors: iData[jFactor] = pd.concat([ iData[jFactor], args["FT"].readData(factor_names=[jFactor], dts=iNewDTs) ]).iloc[0].loc[iReturnDTs, :] else: iReturn = args["FT"].readData( factor_names=[args["ModelArgs"]["收益率因子"]], dts=iNewDTs).iloc[0].loc[iReturnDTs, :] iData = {} for jFactor in SampleFilterFactors: iData[jFactor] = args["FT"].readData( factor_names=[jFactor], dts=iNewDTs).iloc[0].loc[iReturnDTs, :] iMask = eval(SampleFilterStr) iReturn[~iMask] = np.nan iReturnArray = iReturn.loc[:, iIDs].values iSampleCov = RiskModelFun.estimateSampleCovMatrix_EWMA( iReturnArray, forcast_num=1, half_life=args["CovESTArgs"]["半衰期"]) iAvgCorr = RiskModelFun.calcAvgCorr(iSampleCov) iShrinkageTarget = (np.ones(iSampleCov.shape) - np.eye( iSampleCov.shape[0])) * iAvgCorr + np.eye(iSampleCov.shape[0]) iVol = np.diag(iSampleCov)**0.5 iShrinkageTarget = (iShrinkageTarget * iVol).T * iVol iPiMatrix = estimate_pi(iReturnArray, iSampleCov) pi = np.nansum(iPiMatrix) rho = estimate_rho(iReturnArray, iSampleCov, iPiMatrix, iAvgCorr) gamma = estimate_gamma(iSampleCov, iShrinkageTarget) kappa = (pi - rho) / gamma T = iReturnArray.shape[0] delta = max((0, min((kappa / T, 1)))) iCov = (delta * iShrinkageTarget + (1 - delta) * iSampleCov) * args["CovESTArgs"]["预测期数"] iCov = pd.DataFrame(iCov, index=iIDs, columns=iIDs) args["RiskDB"].writeData(args["TargetTable"], iDT, cov=iCov) if ProgBar is not None: ProgBar.update(i + 1) else: args['Sub2MainQueue'].put((args["PID"], 1, None)) if ProgBar is not None: ProgBar.finish() args["FT"].end() return 0
def estimate_gamma(sample_cov, shrinkage_target): return np.nansum((shrinkage_target - sample_cov)**2)
# thickness of zDBc (=dzDB) (with first thickness from surf to first bin) dzDBc = np.vstack( [utils_conv.exp_ji_to_kji(zDBc[0], 1), np.diff(zDBc, axis=0)]) # ----------------------------------------------------------------------------- # PART II // TRANSPORTS # ----------------------------------------------------------------------------- # calculate transports MV = utils_transp.calc_MV(ncdat).values # = V * DX *DZ MVf = utils_transp.calc_MVflat(ncdat).values # = V * DX # ------------------------------------------------------------------------- # - MOC Streamfunction (in depth space) (in Sv) MVxint_mgrd = np.nansum(MV, axis=2) MOC_mgrd_V = utils_ana.nancumsum(MVxint_mgrd, axis=0) # ------------------------------------------------------------------------- # - dMOC Streamfunction (in density space) (in Sv) (M1) # (1) integration of MVfp along density axis weighting with dDB or dzDB # (2) projection on auxilary grid # (3) zonal integration of dMVfc(p) ''' dMVf = utils_conv.resample_colwise(MVf, ncdat.z_t.values, zDBc, method='dMV', fill_value=np.nan, mask = ATLboolmask, mono_method='force') dMVfc = utils_ana.nancumsum(dMVf*dzDBc, axis=0) #! changed dDB to dzDBc dMVfcp = utils_conv.project_on_auxgrd(dMVfc, ncdat.ANGLE.values) dMOC = np.nansum(dMVfcp, axis=-1) ''' dMVf = utils_conv.resample_colwise(MVf,
__GLOBALS.update({ "normalvariate": random.normalvariate, "gauss": random.gauss, "expovariate": random.expovariate, "gammavariate": random.gammavariate, "betavariate": random.betavariate, "lognormvariate": random.lognormvariate, "paretovariate": random.paretovariate, "vonmisesvariate": random.vonmisesvariate, "weibullvariate": random.weibullvariate, "triangular": random.triangular, "uniform": random.uniform, "nanmean": lambda *args: np.nanmean(args), "nanmin": lambda *args: np.nanmin(args), "nanmax": lambda *args: np.nanmax(args), "nansum": lambda *args: np.nansum(args), "nanstd": lambda *args: np.nanstd(args), "nanmedian": lambda *args: np.nanmedian(args), "nancumsum": lambda *args: np.nancumsum(args), "nancumprod": lambda *args: np.nancumprod(args), "nanargmax": lambda *args: np.nanargmax(args), "nanargmin": lambda *args: np.nanargmin(args), "nanvar": lambda *args: np.nanvar(args), "mean": lambda *args: np.mean(args), "min": lambda *args: np.min(args), "max": lambda *args: np.max(args), "sum": lambda *args: np.sum(args), "std": lambda *args: np.std(args), "median": lambda *args: np.median(args), "cumsum": lambda *args: np.cumsum(args), "cumprod": lambda *args: np.cumprod(args),
def _fit(self, params, rr=False): """ Estimate the process model uncertainty Q via optimisation """ """ Make evolution uncertainty matrix Q [7*3, 7*3] -- These are estimate by MLE """ qiso = np.exp( params) * self.band_uncs #np.exp(params) # params to estimate qq = np.array([[i, i * 1e-1, i * 1e-1] for i in qiso]).flatten() Q = np.diag(qq) """ set-up """ # also store log at each step logL = [] II = np.eye(21) x = [] cov = [] xt = self.x0.flatten() covT = self.Cov0 L = [] ys = [] ress = [] res = 0 Hk = self.Hk invS = self.invS S = self.S """ Run """ for t in self.Timesteps: """ predict """ if qa[t]: """ use fcc to improve convergence """ cov0 = np.copy(covT) x0 = np.copy(xt) xt = x0 covT = cov0 + Q # Residuals for b in xrange(7): Hk[3 * b:(3 * b + 3), b] = self.H[t] # supposedly faster #np.place(Hk, Hk>0, H[t]) y = self.refl[t] - self.Hk.T.dot(xt) ys.append(y) # Innovation covariance S = self.Ro + Hk.T.dot(covT).dot(Hk) """ Add the residual check from the DA tutorial one """ np.fill_diagonal(invS, 1 / np.diagonal(S)) #invS = np.linalg.inv(S) K = covT.dot(Hk).dot(invS) # Updated posterior estimate xt = xt + K.dot(y) # Updated posterior covariance covT = (II - K.dot(Hk.T)).dot(covT) # better formula #covT = covT - K.dot(Hk).dot(covT) - covT.dot(Hk.T).dot(K.T) + K.dot(S).dot(K.T) """ And compute the new residual """ e = self.refl[t] - Hk.T.dot(xt) """ supposedly faster inv -- doesn't work """ #inv_M = np.linalg.inv(covT) choleskey, _ = scipy.linalg.lapack.dpotrf(covT, False, False) inv_M, info = scipy.linalg.lapack.dpotri(choleskey) if info != 0: inv_M = np.linalg.inv(covT) """ try and reduce calculations by exploiting block diag structure """ #H_ = Hk[:3, 0] #bx = [H_.T.dot(inv_M[3*b:(3*b+3), 3*b:(3*b+3)]).dot(H_) for b in xrange(7)] #bdet = [det3(covT[3*b:(3*b+3), 3*b:(3*b+3)]) for b in xrange(7)] #logT = np.log(np.prod(bdet))+ (e**2*bx).sum() """ Fast log det from choleskey of the cov matrix """ bdet = np.sum(2 * np.log(np.diagonal(choleskey))) logT = bdet + y.T.dot(Hk.T.dot(inv_M).dot(Hk)).dot(y) #logT = np.log(np.linalg.det(covT)) + y.T.dot(Hk.T.dot(inv_M).dot(Hk)).dot(y) logL.append(logT) x.append(np.array(xt).flatten()) cov.append(covT) else: """ No observations So just use normal model """ ress.append(np.zeros(7)) xt = xt covT = covT + Q ys.append(0) x.append(np.array(xt)) cov.append(covT) """ put together """ x = np.array(x) cov = np.array(cov) ress = np.array(ress) logL = np.array(logL) m = logL > 0 Likelihood = -np.nansum(np.array(logL).flatten()) if rr: return Likelihood, x, logL else: return Likelihood #, x, logL
def _obs_cost_test(self, p, is_full = True, do_unc=False): p = np.array(p).reshape(2, -1) X = self.control_variables.reshape(self.boa.shape[0], 7, -1) X[:, 3:5, :] = np.array(p) xap_H, xbp_H, xcp_H = [], [], [] xap_dH, xbp_dH, xcp_dH = [], [], [] emus = list(self.xap_emus) + list(self.xbp_emus) + list(self.xcp_emus) Xs = list(X) + list(X) + list(X) inps = list(zip(emus, Xs)) # if self._coarse_mask.sum() > 1000: # ret = np.array(parmap(self._helper, inps)) # else: ret = np.array(list(map(self._helper, inps))) xap_H, xbp_H, xcp_H = ret[:, :, 0] .reshape(3, self.boa.shape[0], len(self.Hx)) xap_dH, xbp_dH, xcp_dH = ret[:, :, 1:].reshape(3, self.boa.shape[0], len(self.Hx), 2) y = xap_H * self.toa - xbp_H sur_ref = y / (1 + xcp_H * y) diff = sur_ref - self.boa full_J = np.nansum(0.5 * self.band_weights[...,None] * (diff)**2 / self.boa_unc**2, axis=0) J = np.zeros(self.full_res) dH = -1 * (-self.toa[...,None] * xap_dH - \ 2 * self.toa[...,None] * xap_H[...,None] * xbp_H[...,None] * xcp_dH + \ self.toa[...,None]**2 * xap_H[...,None]**2 * xcp_dH + \ xbp_dH + \ xbp_H[...,None]**2 * xcp_dH) / \ (self.toa[...,None] * xap_H[...,None] * xcp_H[...,None] - \ xbp_H[...,None] * xcp_H[...,None] + 1)**2 full_dJ = [ self.band_weights[...,None] * dH[:,:,i] * diff / (self.boa_unc ** 2) for i in range(2)] if is_full: dJ = np.nansum(np.array(full_dJ), axis=(1,)) dJ [np.isnan(dJ)] = 0 full_J[np.isnan(full_J)] = 0 #J_ = np.zeros((2,) + self.full_res) #J_[:, self.Hx, self.Hy] = dJ #subs1 = [np.array_split(sub, self.num_blocks_y, axis=2) for sub in np.array_split(J_, self.num_blocks_x, axis=1)] #J = np.zeros(self.full_res) #J[self.Hx, self.Hy] = full_J #subs2 = [np.array_split(sub, self.num_blocks_y, axis=1) for sub in np.array_split(J, self.num_blocks_x, axis=0)] #J_ = np.zeros((2, self.num_blocks_x, self.num_blocks_y)) #J = np.zeros(( self.num_blocks_x, self.num_blocks_y)) nx, ny = (np.ceil(np.array(self.full_res) / np.array([self.num_blocks_x, self.num_blocks_y])) \ * np.array([self.num_blocks_x, self.num_blocks_y])).astype(int) #end_x, end_y = np.array(self.full_res) - np.array([nx, ny]) x_size, y_size = int(nx / self.num_blocks_x), int(ny / self.num_blocks_y) J_ = np.zeros((2, nx, ny)) J = np.zeros(( nx, ny)) #J_[:], J[:] = np.nan, np.nan J_[:, self.Hx, self.Hy] = dJ J [ self.Hx, self.Hy] = full_J J_ = J_.reshape(2, self.num_blocks_x, x_size, self.num_blocks_y, y_size) J = J.reshape( self.num_blocks_x, x_size, self.num_blocks_y, y_size) J_ = np.sum(J_, axis=(2,4)) J = np.sum(J, axis=(1,3)) #for i in range(self.num_blocks_x): # for j in range(self.num_blocks_y): # J_[:, i,j] = np.nansum(subs1[i][j], axis=(1,2)) # J [ i,j] = np.nansum(subs2[i][j], axis=(0,1)) J_[:, ~self._coarse_mask] = 0 J [ ~self._coarse_mask] = 0 J_ = J_.reshape(2, -1) if do_unc: #comb_unc = np.nansum([self.band_weights[...,None] * (dH[:, :, i] ** 2) * (self.boa_unc ** -2) for i in range(2)], axis = 1) #comb_unc[comb_unc==0] = np.nan #self.obs_unc = np.zeros((2,) + self.full_res) #self.obs_unc[:] = np.nan #self.obs_unc[:, self.Hx, self.Hy] = comb_unc comb_unc = np.nansum([self.band_weights[...,None] * (dH[:, :, i] ** 2) * (self.boa_unc ** -2) for i in range(2)], axis = 1) comb_unc[comb_unc==0] = np.nan self.obs_unc = np.zeros((2, nx, ny)) self.obs_unc[:] = np.nan self.obs_unc[:, self.Hx, self.Hy] = comb_unc self.obs_unc = self.obs_unc.reshape(2, self.num_blocks_x, x_size, self.num_blocks_y, y_size) self.obs_unc = np.nanmean(self.obs_unc, axis=(2,4)) #subs = [np.array_split(sub, self.num_blocks_y, axis=2) for sub in np.array_split(self.obs_unc, self.num_blocks_x, axis=1)] #self.obs_unc = np.zeros((2, self.num_blocks_x, self.num_blocks_y)) #for i in range(self.num_blocks_x): # for j in range(self.num_blocks_y): # self.obs_unc[:, i,j] = np.nanmean(subs[i][j], axis=(1,2)) self.obs_unc[:,~self._coarse_mask] = np.nan return self.obs_unc else: J = np.nansum(np.array(full_J)) J_ = np.nansum(np.array(full_dJ), axis=(1, 2)) return J, J_
#data: ipixarr, ereo, lamb, others, radallcl, radallcs, fclall ipixarr = data["arr_0"] ereo = np.array(data["arr_1"]) radwvcs = np.array(data["arr_5"]) radwvcl = np.array(data["arr_4"]) if np.shape(radwvcs)[1] == np.shape(radwvcl)[1]: nlamb = np.shape(radwvcs)[1] else: sys.exit("INCONSISTENT DATA") fclall = np.zeros(pgeo.npix) fclall[ipixarr] = data["arr_6"] radcs = np.zeros(pgeo.npix) radcs[ipixarr] = np.nansum(radwvcs, axis=1) / nlamb * ereo radcl = np.zeros(pgeo.npix) radcl[ipixarr] = np.nansum(radwvcl, axis=1) / nlamb * ereo radcs[radcs < 0.0] = 0.0 ####force to zero if fcl < 0: fclall[fclall < 0.0] = 0.0 ############################ rad = radcs * (1.0 - fclall) + fclall * radcl lc.append(np.nansum(rad) * dOmega) lccs.append(np.nansum(radcs * (1.0 - fclall)) * dOmega) lccl.append(np.nansum(radcl * fclall) * dOmega) lccsf.append(np.nansum(radcs) * dOmega) fcl.append(np.median(data["arr_6"]))
# amplitude_kd_table = DataFrame( # amplitude_tables[1], index=ordered_labels, columns=ordered_labels) # # amplitude_kd_table.to_latex("amplitude_ks_table_pvals.tex") # amplitude_kd_table.to_csv("amplitude_ks_table_pvals.csv") if covering_frac: from pandas import DataFrame from astropy.io.fits import getdata cf = dict.fromkeys(widths.keys()) for i, name in enumerate(np.sort(widths.keys())): # Load the image in img = getdata(name + "/" + name + "_regrid_convolved.fits") model = getdata(name + "/" + name + "_filament_model.fits") cf[name] = np.nansum(model) / np.nansum(img) df = DataFrame(cf.values(), index=cf.keys(), columns=["Covering Fraction"]) df = df.sort() print(df) df.to_csv("covering_fracs.csv") if bran_len: new_branches = {} for key in branches.keys(): per_branch = [] for lis in branches[key]: # Split out parts str_list = lis[1:-1].split(',') float_list = [] for string in str_list:
def stack(stall, df, tstart, npts, stdup, stddown, nch_min): std_trac = np.empty(len(stall)) td = np.empty(len(stall)) """ Function to stack traces in a stream with different trace.id and different starttime but the same number of datapoints. Returns a trace having as starttime the earliest startime within the stream """ for itr, tr in enumerate(stall): std_trac[itr] = quality_cft(tr) avestd = np.nanmean(std_trac[0:]) avestdup = avestd * stdup avestddw = avestd * stddown for jtr, tr in enumerate(stall): if std_trac[jtr] >= avestdup or std_trac[jtr] <= avestddw: stall.remove(tr) print("removed Trace n Stream = ...", tr, std_trac[jtr], avestd) td[jtr] = 99.99 # print(td[jtr]) else: sta = tr.stats.station chan = tr.stats.channel net = tr.stats.network s = "%s.%s.%s" % (net, sta, chan) td[jtr] = float(d[s]) # print(td[jtr]) itr = len(stall) print("itr == ", itr) if itr >= nch_min: tdifmin = min(td) tdat = np.nansum([tr.data for tr in stall], axis=0) / itr sta = "STACK" cha = "BH" net = "XX" header = { "network": net, "station": sta, "channel": cha, "starttime": tstart, "sampling_rate": df, "npts": npts, } tt = Trace(data=tdat, header=header) else: tdifmin = None sta = "STACK" cha = "BH" net = "XX" header = { "network": net, "station": sta, "channel": cha, "starttime": tstart, "sampling_rate": df, "npts": npts, } tt = Trace(data=np.zeros(npts), header=header) return tt, tdifmin
def __entrofy(X, k, rng, w=None, q=None, pre_selects=None, quantile=0.01, alpha=0.5): """See entrofy() for documentation""" n_participants, n_attributes = X.shape X = np.array(X, dtype=np.float) if w is None: w = np.ones(n_attributes) if q is None: q = 0.5 * np.ones(n_attributes) assert 0 < k <= n_participants assert not np.any(w < 0) assert np.all(q >= 0.0) and np.all(q <= 1.0) assert len(w) == n_attributes assert len(q) == n_attributes if k == n_participants: return np.arange(n_participants) # Convert fractions to sums q = np.round(k * q) # Initialization y = np.zeros(n_participants, dtype=bool) if pre_selects is not None: y[pre_selects] = True # Where do we have missing data? Xn = np.isnan(X) while True: i = y.sum() if i >= k: break # Initialize the distribution vector # We suppress empty-slice warnings here: # even if y is non-empty, some column of X[y] may be all nans # in this case, the index set (y and not-nan) becomes empty. # It's easier to just ignore this warning here and recover below # than to prevent it by slicing out each column independently. with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) p = np.nansum(X[y], axis=0) p[np.isnan(p)] = 0.0 # Compute the candidate distributions p_new = p + X # Wherever X is nan, propagate the old p since we have no new information p_new[Xn] = (Xn * p)[Xn] # Compute marginal gain for each candidate delta = __objective(p_new, w, q, alpha=alpha) - __objective( p, w, q, alpha=alpha) # Knock out the points we've already taken delta[y] = -np.inf # Select the top score. Break near-ties randomly. delta_real = delta[np.isfinite(delta)] target_score = np.percentile(delta_real, 100 * (1.0 - quantile)) new_idx = rng.choice(np.flatnonzero(delta >= target_score)) y[new_idx] = True return ( __objective(np.nansum(X[y], axis=0), w, q, alpha=alpha), np.flatnonzero(y), )
height = period_nvdi.shape[0] width = period_nvdi.shape[1] bosque_nobosque = np.full(period_nvdi.shape, -1) for y1 in xrange(0, height, slice_size): for x1 in xrange(0, width, slice_size): x2 = x1 + slice_size y2 = y1 + slice_size if (x2 > width): x2 = width if (y2 > height): y2 = height submatrix = period_nvdi[y1:y2, x1:x2] ok_pixels = np.count_nonzero(~np.isnan(submatrix)) if ok_pixels == 0: bosque_nobosque[y1:y2, x1:x2] = nodata elif float(np.nansum(submatrix > ndvi_threshold)) / float( ok_pixels) >= vegetation_rate: bosque_nobosque[y1:y2, x1:x2] = 1 else: bosque_nobosque[y1:y2, x1:x2] = 0 ncoords = [] xdims = [] xcords = {} for x in nbar.coords: if (x != 'time'): ncoords.append((x, nbar.coords[x])) xdims.append(x) xcords[x] = nbar.coords[x] variables = { "bosque_nobosque": xr.DataArray(bosque_nobosque,
def mad(dmad): # calculate daily median absolute deviation ccm = dmad[dmad != 0] med_val = np.nanmedian(ccm) tstda = np.nansum(abs(ccm - med_val) / len(ccm)) return tstda
def SaveStereoImagesh5(Info2DS, FlightNumberStr, SizeThreshold): Path2DS = Info2DS[FlightNumberStr, 'Path2DS'] Path2DSsave = Info2DS[FlightNumberStr, 'Path2DSsave'] ThresholdDeltaDimaterY = Info2DS[FlightNumberStr, 'ThresholdDeltaDiameterY'] tmp = [ F for F in os.listdir(Path2DSsave) if F.endswith(".h5") and F.startswith('dNdD_L_Colocate_') ] files = [x.replace('dNdD_L_Colocate_', '') for x in tmp] for filena in files: print(filena) #Load colocation pbp statistics Data_h5 = h5py.File(Path2DSsave + 'Colocate_' + filena, 'r') ColocationParticleBufferTimeS_Ch0 = np.array( Data_h5['ColocationParticleBufferTimeS_Ch0']) ColocationParticleBufferTimeS_Ch1 = np.array( Data_h5['ColocationParticleBufferTimeS_Ch1']) ColocationImageID_Ch0 = np.array(Data_h5['ColocationImageID_Ch0']) ColocationImageID_Ch1 = np.array(Data_h5['ColocationImageID_Ch1']) ColocationEdgeCH0 = np.array(Data_h5['ColocationEdgeCh0']) ColocationEdgeCH1 = np.array(Data_h5['ColocationEdgeCh1']) ColocationMeanXYDiameter_Ch1 = np.array( Data_h5['ColocationMeanXYDiameter_Ch1']) ColocationMeanXYDiameter_Ch0 = np.array( Data_h5['ColocationMeanXYDiameter_Ch0']) ColocationSlicesY_Ch0 = np.array(Data_h5['ColocationSlicesY_Ch0']) ColocationSlicesY_Ch1 = np.array(Data_h5['ColocationSlicesY_Ch1']) ColocationSecondsCh0 = np.array(Data_h5['ColocationSecondsCh0']) ColocationSecondsCh1 = np.array(Data_h5['ColocationSecondsCh1']) Data_h5.close() #Load images Data_h5 = h5py.File(Path2DS + 'Export_' + filena, 'r') ImageTimes = np.array(Data_h5['ImageTimes'][:, 0]) ImageSlices = np.array(Data_h5['ImageTimes'][:, 1]) ImageID_Ch0 = np.array(Data_h5['ImageTimes'][:, 2]) ImageID_Ch1 = np.array(Data_h5['ImageTimes'][:, 2]) ImageSlices[ImageSlices < 0] = np.nan #Find start position of image within ImageData ImagePosition = np.cumsum(ImageSlices, axis=0) ImagePosition = np.append(0, ImagePosition) #Indexes to save images Stereo_Idxs = np.nonzero( ((ColocationMeanXYDiameter_Ch1 > SizeThreshold) | (ColocationMeanXYDiameter_Ch0 > SizeThreshold)) & ((ColocationEdgeCH0 == 0) | (ColocationEdgeCH1 == 0)) & (np.absolute(ColocationSlicesY_Ch0 - ColocationSlicesY_Ch1) < ThresholdDeltaDimaterY)) Stereo_Idxs = Stereo_Idxs[0] # Number of slices per stereo images OutputSlicesY_Ch0 = ColocationSlicesY_Ch0[ Stereo_Idxs] / 10 # needs to be in pixels not size OutputSlicesY_Ch1 = ColocationSlicesY_Ch1[Stereo_Idxs] / 10 #Position within output image array OutputImagePositionCh0 = np.cumsum(OutputSlicesY_Ch0, axis=0) OutputImagePositionCh0 = np.append(0, OutputImagePositionCh0) OutputImagePositionCh1 = np.cumsum(OutputSlicesY_Ch1, axis=0) OutputImagePositionCh1 = np.append(0, OutputImagePositionCh1) # Set up output array OutputImageCh0 = np.ones([128, int(np.nansum(OutputSlicesY_Ch0))], dtype=np.uint8) * 255 OutputImageCh1 = np.ones([128, int(np.nansum(OutputSlicesY_Ch1))], dtype=np.uint8) * 255 # select particles and put images in OutputImage for j, Idx in enumerate(Stereo_Idxs): # find each image in array #channel 0 Ch0i = np.nonzero( (ImageTimes == ColocationParticleBufferTimeS_Ch0[Idx]) & (ImageID_Ch0 == ColocationImageID_Ch0[Idx])) i = Ch0i[0] if (len(i) == 0): print('Missing =' + str(i)) # if can't find particle else: if (len(i) > 1): print('Multiple particles with same ID=' + str(i)) #repeat particle i = i[0] if (ImagePosition[i + 1] - ImagePosition[i] != (ColocationSlicesY_Ch0[Idx] / 10)): print('zero slice =' + str(i)) # 0 slice particle else: ImageCH0 = np.array( Data_h5['ImageData'] [:, int(ImagePosition[i]):int(ImagePosition[i + 1])]) #Add to output array OutputImageCh0[:, int(OutputImagePositionCh0[j]):int( OutputImagePositionCh0[j + 1])] = ImageCH0 #Channel 1 Ch1i = np.nonzero( (ImageTimes == ColocationParticleBufferTimeS_Ch1[Idx]) & (ImageID_Ch1 == ColocationImageID_Ch1[Idx])) i = Ch1i[0] if (len(i) == 0): print('Missing =' + str(i)) # if can't find particle else: if (len(i) > 1): print('Multiple particles with same ID=' + str(i)) #repeat particle i = i[0] if (ImagePosition[i + 1] - ImagePosition[i] != (ColocationSlicesY_Ch1[Idx] / 10)): print('zero slice =' + str(i)) # 0 slice particle else: ImageCH1 = np.array( Data_h5['ImageData'] [:, int(ImagePosition[i]):int(ImagePosition[i + 1])]) #Add to output array OutputImageCh1[:, int(OutputImagePositionCh1[j]):int( OutputImagePositionCh1[j + 1])] = ImageCH1 Data_h5.close() # # Save the images as .h5 h5f = h5py.File(Path2DSsave + 'StereoImages_' + filena, 'w') h5f.create_dataset('ImageCh0', data=OutputImageCh0) h5f.create_dataset('ImagePositionCh0', data=OutputImagePositionCh0) h5f.create_dataset('SecondsCh0', data=ColocationSecondsCh0[Stereo_Idxs]) h5f.create_dataset('SlicesY_Ch0', data=OutputSlicesY_Ch0) h5f.create_dataset('ImageCh1', data=OutputImageCh1) h5f.create_dataset('ImagePositionCh1', data=OutputImagePositionCh1) h5f.create_dataset('SecondsCh1', data=ColocationSecondsCh1[Stereo_Idxs]) h5f.create_dataset('SlicesY_Ch1', data=OutputSlicesY_Ch1) h5f.close()
def plot_histograms(data, sims, types, figname): cols = [ "x1", "c", "zHD", "FITPROB", "FITCHI2", "cERR", "x1ERR", "PKMJDERR", "HOST_LOGMASS", "SNRMAX1", "SNRMAX2", "SNRMAX3", #"SNRMAX_g", #"SNRMAX_r", #"SNRMAX_i", #"SNRMAX_z", ["zHD", "c"], ["zHD", "x1"], ["zHD", "HOST_LOGMASS"], "NDOF", #"chi2_g", #"chi2_r", #"chi2_i", #"chi2_z", "__MUDIFF", ] restricted = [ "FITCHI2", "SNRMAX1", "SNRMAX2", "SNRMAX3", "SNRMAX_g", "SNRMAX_r", "SNRMAX_i", "SNRMAX_z", "chi2_g", "chi2_r", "chi2_i", "chi2_z" ] logs = [ "SNRMAX1", "SNRMAX2", "SNRMAX3", "SNRMAX_g", "SNRMAX_r", "SNRMAX_i", "SNRMAX_z", "FITCHI2", "chi2_g", "chi2_r", "chi2_i", "chi2_z", "__MUDIFF" ] cs = [ "#1976D2", "#FB8C00", "#8BC34A", "#E53935", "#4FC3F7", "#43A047", "#F2D026", "#673AB7", "#FFB300", "#E91E63", "#F2D026" ] * 3 usecols = [] for c in cols: if isinstance(c, list): if (c[0] in data[0][0].columns) & (c[1] in data[0][0].columns): usecols.append(c) else: if c in data[0][0].columns: usecols.append(c) for c in restricted: for x in data + sims: if c in cols: x[0].loc[x[0][c] < -10, c] = -9 ncols = (len(cols) + 3) // 3 fig, axes = plt.subplots(3, ncols, figsize=(1 + 2.5 * ncols, 8), gridspec_kw={ "wspace": 0.13, "hspace": 0.4 }) for ax in axes.flatten(): ax.set_axis_off() for c, ax in zip(cols, axes.flatten()): ax.set_axis_on() u = 0.95 if c in restricted else 0.99 #HISTOGRAM if not isinstance(c, list): minv = min([x[0][c].quantile(0.01) for x in data + sims]) maxv = max([x[0][c].quantile(u) for x in data + sims]) bins = np.linspace(minv, maxv, 20) # Keep binning uniform. bc = 0.5 * (bins[1:] + bins[:-1]) for i, (d, n) in enumerate(data): hist, _ = np.histogram(d[c], bins=bins) err = np.sqrt(hist) area = (bins[1] - bins[0]) * hist.sum() delta = (bc[1] - bc[0]) / 20 ax.errorbar(bc + i * delta, hist / area, yerr=err / area, fmt="o", ms=2, elinewidth=0.75, label=n) lw = 1 if len(sims) < 3 else 0.5 for index, (s, n) in enumerate(sims): mask = np.isin(s["TYPE"], types) ia = s[mask] nonia = s[~mask] if len(np.unique(s[c])) == 1: continue hist, _ = np.histogram(s[c], bins=bins) area = (bins[1] - bins[0]) * hist.sum() ax.hist(s[c], bins=bins, histtype="step", weights=np.ones(s[c].shape) / area, label=n, linewidth=lw, color=cs[index]) if len(sims) == 1 and nonia.shape[0] > 10 and len(data) == 1: logging.info(f"Nonia shape is {nonia.shape}") ax.hist(ia[c], bins=bins, histtype="step", weights=np.ones(ia[c].shape) / area, linestyle="--", label=n + " Ia only", linewidth=1) ax.hist(nonia[c], bins=bins, histtype="step", weights=np.ones(nonia[c].shape) / area, linestyle=":", label=n + " CC only", linewidth=1) if "MUDIFF" in c: ax.set_xlabel("FAKE MUDIFF") else: ax.set_xlabel(c) if c in logs: ax.set_yscale("log") ax.tick_params(axis="y", which="both", labelsize=2) labels = ["" for item in ax.get_yticklabels()] ax.set_yticklabels(labels) # Add the reduced chi2 value if there are only one data and one sim if len(sims) < 3 and len(data) == 1: data_col = data[0][0][c] data_hist, _ = np.histogram(data_col, bins=bins) data_err = 1 / np.sqrt(data_hist) data_dist, _ = np.histogram(data_col, bins=bins, density=True) for i, (s, n) in enumerate(sims): sim_col = s[c] sim_hist, _ = np.histogram(sim_col, bins=bins) sim_err = 1 / np.sqrt(data_hist) sim_dist, _ = np.histogram(sim_col, bins=bins, density=True) dist_error = np.sqrt((data_dist * data_err)**2 + (sim_dist * sim_err)**2) dist_diff = data_dist - sim_dist chi2 = np.nansum(((dist_diff / dist_error)**2)) ndof = len(bc) red_chi2 = chi2 / ndof ax.text( 0.99, 0.99 - 0.1 * i, f"{chi2:0.1f}/{ndof:d}={red_chi2:0.1f}", horizontalalignment="right", verticalalignment="top", transform=ax.transAxes, color=cs[i], fontsize=8, ) ax.set_yticklabels([]) else: minv = min([x[0][c[0]].quantile(0.01) for x in data + sims]) maxv = max([x[0][c[0]].quantile(u) for x in data + sims]) bins = np.linspace(minv, maxv, 20) for i, (s, n) in enumerate(sims): sim_xcol = s[c[0]] sim_ycol = s[c[1]] bin_medians, bin_edges, binnumber = binned_statistic( sim_xcol, sim_ycol, statistic='median', bins=bins) bincenters = (bin_edges[:-1] + bin_edges[1:]) / 2. ax.plot(bincenters, bin_medians, label=n, alpha=.9, color=cs[i]) for i, (d, n) in enumerate(data): data_xcol = d[c[0]] data_ycol = d[c[1]] try: bin_medians, bin_edges, binnumber = binned_statistic( data_xcol, data_ycol, statistic='median', bins=bins) bin_stds, bin_edges, binnumber = binned_statistic( data_xcol, data_ycol, statistic='std', bins=bins) bin_counts, bin_edges, binnumber = binned_statistic( data_xcol, data_ycol, statistic='count', bins=bins) bincenters = (bin_edges[:-1] + bin_edges[1:]) / 2. ax.errorbar(bincenters, bin_medians, yerr=bin_stds / np.sqrt(bin_counts), fmt='o', label=n, alpha=.9) except: pass ax.set_xlabel(c[0]) ax.set_ylabel(c[1]) #ax.legend() handles, labels = ax.get_legend_handles_labels() bb = (fig.subplotpars.left, fig.subplotpars.top + 0.02, fig.subplotpars.right - fig.subplotpars.left, 0.1) #for ax in axes.flatten(): # ax.set_yticklabels([]) fig.legend(handles, labels, loc="upper center", ncol=4, mode="expand", frameon=False, bbox_to_anchor=bb, borderaxespad=0.0, bbox_transform=fig.transFigure) # plt.legend(bbox_to_anchor=(-3, 2.3, 4.0, 0.2), loc="lower left", mode="expand", ncol=3, frameon=False) # plt.tight_layout(rect=[0, 0, 0.75, 1]) fig.savefig(figname, bbox_inches="tight", dpi=600)
def replace_nans(array, max_iter, tol, kernel_size=2, method='disk'): """Replace NaN elements in an array using an iterative image inpainting algorithm. The algorithm is the following: 1) For each element in the input array, replace it by a weighted average of the neighbouring elements which are not NaN themselves. The weights depend on the method type. See Methods below. 2) Several iterations are needed if there are adjacent NaN elements. If this is the case, information is "spread" from the edges of the missing regions iteratively, until the variation is below a certain threshold. Methods: localmean - A square kernel where all elements have the same value, weights are equal to n/( (2*kernel_size+1)**2 -1 ), where n is the number of non-NaN elements. disk - A circular kernel where all elements have the same value, kernel is calculated by:: if ((S-i)**2 + (S-j)**2)**0.5 <= S: kernel[i,j] = 1.0 else: kernel[i,j] = 0.0 where S is the kernel radius. distance - A circular inverse distance kernel where elements are weighted proportional to their distance away from the center of the kernel, elements farther away have less weight. Elements outside the specified radius are set to 0.0 as in 'disk', the remaining of the weights are calculated as:: maxDist = ((S)**2 + (S)**2)**0.5 kernel[i,j] = -1*(((S-i)**2 + (S-j)**2)**0.5 - maxDist) where S is the kernel radius. Parameters ---------- array : 2d or 3d np.ndarray an array containing NaN elements that have to be replaced max_iter : int the number of iterations tol : float On each iteration check if the mean square difference between values of replaced elements is below a certain tolerance `tol` kernel_size : int the size of the kernel, default is 1 method : str the method used to replace invalid values. Valid options are `localmean`, `disk`, and `distance`. Returns ------- filled : 2d or 3d np.ndarray a copy of the input array, where NaN elements have been replaced. """ kernel_size = int(kernel_size) filled = array.copy() n_dim = len(array.shape) # generating the kernel kernel = np.zeros([2 * kernel_size + 1] * len(array.shape), dtype=int) if method == 'localmean': kernel += 1 elif method == 'disk': dist, dist_inv = get_dist(kernel, kernel_size) kernel[dist <= kernel_size] = 1 elif method == 'distance': dist, dist_inv = get_dist(kernel, kernel_size) kernel[dist <= kernel_size] = dist_inv[dist <= kernel_size] else: raise ValueError( 'method not valid. Should be one of `localmean`, `disk` or `distance`.' ) # list of kernel array indices # kernel_indices = np.indices(kernel.shape) # kernel_indices = np.reshape(kernel_indices, (n_dim, (2 * kernel_size + 1) ** n_dim), order="C").T # indices where array is NaN nan_indices = np.array(np.nonzero(np.isnan(array))).T.astype(int) # number of NaN elements n_nans = len(nan_indices) # arrays which contain replaced values to check for convergence replaced_new = np.zeros(n_nans) replaced_old = np.zeros(n_nans) # make several passes # until we reach convergence for it in range(max_iter): # note: identifying new nan indices and looping other the new indices would give slightly different result # for each NaN element for k in range(n_nans): ind = nan_indices[ k] #2 or 3 indices indicating the position of a nan element # init to 0.0 replaced_new[k] = 0.0 # generating a list of indices of the convolution window in the array slice_indices = np.array( np.meshgrid( * [range(i - kernel_size, i + kernel_size + 1) for i in ind])) # identifying all indices strictly inside the image edges: in_mask = np.array([ np.logical_and(slice_indices[i] < array.shape[i], slice_indices[i] >= 0) for i in range(n_dim) ]) # logical and over x,y (and z) indices in_mask = np.prod(in_mask, axis=0).astype(bool) # extract window from array win = filled[tuple(slice_indices[:, in_mask])] # selecting the same points from the kernel kernel_in = kernel[in_mask] # sum of elements of the kernel that are not nan in the window non_nan = np.sum(kernel_in[~np.isnan(win)]) if non_nan > 0: # convolution with the kernel replaced_new[k] = np.nansum(win * kernel_in) / non_nan else: # don't do anything if there is only nans around replaced_new[k] = np.nan # bulk replace all new values in array filled[tuple(nan_indices.T)] = replaced_new # check if replaced elements are below a certain tolerance if np.mean((replaced_new - replaced_old)**2) < tol: break else: replaced_old = replaced_new return filled
FSTATE = { fn: ", ".join([states[stid] for stid in np.unique(rstate[fni == rfields])]) for fni, fn in enumerate(field_names) } if not args.compute_exposure: for f in FDB: vermeerkat.log.info("\t '{0:s}' index {1:s} marked by observer as " "'{2:s}'".format(f, FDB[f], FSTATE[f])) else: vermeerkat.log.info("Computing exposure... stand by") with tbl(os.path.join(MSDIR, ZEROGEN_DATA), ack=False) as t: flg = t.getcol("FLAG") exp = t.getcol("EXPOSURE") exp[flg] = np.nan FEXP = { fn: np.nansum(exp[rfields == fni]) for fni, fn in enumerate(field_names) } vermeerkat.log.info("The following fields are available:") for f in FDB: vermeerkat.log.info( "\t '{0:s}' index {1:s} marked by observer as " "'{2:s}' with unflagged exposure of {3:02.0f}:{4:02.0f}:{5:02.2f}". format(f, FDB[f], FSTATE[f], FEXP[f] // 3600, FEXP[f] % 3600 // 60, FEXP[f] % 3600 % 60)) vermeerkat.log.info("---End of listr---")
def steric(Tb, Sb, dT, dS, deta, eta): ogrid = ModelGrid(fname='../wrkdir/grid_spec.nc') p = np.ones(np.shape(Sb)) h = np.ones(np.shape(Sb)) for index in range(0, np.shape(p)[0]): p[index, :, :] = ogrid.zt[index] if (index == 0): h[index, :, :] = ogrid.zb[index] #-eta else: h[index, :, :] = abs(ogrid.zb[index] - ogrid.zb[index - 1]) rho_b = eos.density(Sb, Tb, p) rho_a = eos.density(Sb + dS, Tb + dT, p) rho_a_halo = eos.density(Sb + dS, Tb, p) rho_a_thermo = eos.density(Sb, Tb + dT, p) dsteric = -np.nansum(h * (rho_a - rho_b) / rho_b, axis=0) dhalosteric = -np.nansum(h * (rho_a_halo - rho_b) / rho_b, axis=0) dthermosteric = -np.nansum(h * (rho_a_thermo - rho_b) / rho_b, axis=0) return dsteric, dhalosteric, dthermosteric ''' print np.shape(dsteric), np.shape(Sb) dsteric[dsteric==0.0]=np.nan deta[deta==0.0]=np.nan print 'deta-dsteric=',np.nansum(np.abs(deta-dsteric)) fig = plt.figure(num=1, figsize=(19,12), facecolor='w') #grid = AxesGrid(fig, 111, nrows_ncols = (2, 3), axes_pad = 0.5, # cbar_location="bottom", # cbar_mode="each", # cbar_size="7%", # cbar_pad="2%") vmax=0.1 vmin=-vmax #plt.sca(grid[0]) plt.subplot(231) ch=plt2d(ogrid.x, ogrid.y, dsteric, minval=vmin, maxval=vmax, colmap=cm.jet) #grid.cbar_axes[0].colorbar(ch) plt.title('Steric height infered increment [m]') #plt.sca(grid[1]) plt.subplot(232) incr_s=np.flipud(dhalosteric) incr_s[incr_s==0.0]=np.nan ch=plt2d(ogrid.x, ogrid.y, dhalosteric, minval=vmin, maxval=vmax, colmap=cm.jet) #grid.cbar_axes[1].colorbar(ch) plt.title('Halo Steric height infered increment [m]') #plt.sca(grid[2]) plt.subplot(233) incr_t=np.flipud(dthermosteric) incr_t[incr_t==0.0]=np.nan ch=plt2d(ogrid.x, ogrid.y, dthermosteric, minval=vmin, maxval=vmax, colmap=cm.jet) #grid.cbar_axes[2].colorbar(ch) plt.title('Thermo Steric height infered increment [m]') #plt.sca(grid[3]) plt.subplot(234) ch=plt2d(ogrid.x, ogrid.y, deta, minval=vmin, maxval=vmax, colmap=cm.jet) #grid.cbar_axes[3].colorbar(ch) plt.title('SSH increment [m]') vmin=-0.1 vmax=0.1 #plt.sca(grid[4]) plt.subplot(235) err=deta-dsteric #err[np.abs(err)<0.05]=np.nan #err=np.abs(deta/dsteric) ch=plt2d(ogrid.x, ogrid.y, err, minval=vmin, maxval=vmax, colmap=cm.bwr) #grid.cbar_axes[4].colorbar(ch) plt.title('SSH - Steric [m]') #plt.sca(grid[5]) plt.subplot(236) err=deta-dthermosteric #err=np.abs(deta/dthermosteric) #err[np.abs(err)<0.05]=np.nan ch=plt2d(ogrid.x, ogrid.y, err, minval=vmin, maxval=vmax, colmap=cm.bwr) #grid.cbar_axes[5].colorbar(ch) plt.title('SSH - Thermosteric [m]') ''' plt.subplot(235) incr_t = np.flipud(dthermosteric) incr_t[incr_t == 0.0] = np.nan plt.imshow(incr_t, vmin=vmin, vmax=vmax) #plt.imshow(np.flipud(dthermosteric),vmin=vmin,vmax=vmax) plt.colorbar() plt.subplot(231) plt.imshow(np.flipud(deta), vmin=vmin, vmax=vmax) plt.title('SSH increment [m]') plt.colorbar() plt.subplot(233) err = np.flipud(deta - dsteric) err[np.abs(err) < 0.02] = np.nan plt.imshow(err, vmin=vmin, vmax=vmax) #plt.imshow(np.flipud(deta-dsteric),vmin=vmin,vmax=vmax) plt.title('SSH - Steric height [m]') plt.colorbar() '''
def _fill_plot(self, xaxis, yaxis, zi, ax, cax, title, yticks, vmin=None, vmax=None): xi = xaxis[:] yi = yaxis[:] X, Y, Z = pcolor_helper(xi, yi, zi) if vmax is None: vmax = np.nanmax(Z) if vmin is None: vmin = np.nanmin(Z) if self.data_signed: extent = max(vmax, -vmin) vmin = -extent vmax = extent # pcolor mappable = ax.pcolor(X, Y, Z, cmap=self.cmap, vmin=vmin, vmax=vmax) ax.set_xlim(xi.min(), xi.max()) ax.set_ylim(yi.min(), yi.max()) ax.grid() if xaxis.units_kind == yaxis.units_kind: diagonal_line(xi, yi, ax=ax) plt.setp(ax.get_yticklabels(), visible=yticks) # x sideplot sp = add_sideplot(ax, 'x') b = np.nansum(zi, axis=0) * len(yaxis[:]) b[b == 0] = np.nan b /= np.nanmax(b) sp.plot(xi, b, lw=2, c='b') sp.set_xlim([xi.min(), xi.max()]) sp.set_ylim(self.sideplot_limits) for data, channel_index, c in self.sideplot_dictionary[xaxis.name]: data.convert(xaxis.units, verbose=False) sp_xi = data.axes[0][:] sp_zi = data.channels[channel_index][:] sp_zi[sp_xi < xi.min()] = 0 sp_zi[sp_xi > xi.max()] = 0 sp_zi /= np.nanmax(sp_zi) sp.plot(sp_xi, sp_zi, lw=2, c=c) sp.grid() if self.data_signed: sp.axhline(0, c='k', lw=1) sp.set_title(title, fontsize=18) sp0 = sp # y sideplot sp = add_sideplot(ax, 'y') b = np.nansum(zi, axis=1) * len(xaxis[:]) b[b == 0] = np.nan b /= np.nanmax(b) sp.plot(b, yi, lw=2, c='b') sp.set_xlim(self.sideplot_limits) sp.set_ylim([yi.min(), yi.max()]) for data, channel_index, c in self.sideplot_dictionary[yaxis.name]: data.convert(xaxis.units, verbose=False) sp_xi = data.axes[0][:] sp_zi = data.channels[channel_index][:] sp_zi[sp_xi < xi.min()] = 0 sp_zi[sp_xi > xi.max()] = 0 sp_zi /= np.nanmax(sp_zi) sp.plot(sp_zi, sp_xi, lw=2, c=c) sp.grid() if self.data_signed: sp.axvline(0, c='k', lw=1) sp1 = sp # colorbar plt.colorbar(mappable=mappable, cax=cax) return [sp0, sp1]
# Add all trends (mostly negative) to the initial ice areas to find the final # ice areas. # Use the last month that corresponds to the initial month. # For example, compare June 2001 with June 2018, do not compare # June 2001 with August 2018. # On second thought, this doesn't matter. A larger area decrease will # mean a larger melting flux increase. A smaller area increase will # mean a smaller melting flux increase, so comparing June with August # should be okay. # Units are km2 ice_areas_final = ice_areas_initial + ice_data['grid_ice_area_trend'] water_areas_final = ice_areas_initial - ice_areas_final # Take the sums of both the initial ice areas and final ice areas. # Units are converted to m2 sum_init_ice = np.nansum(ice_areas_initial) * 1e6 sum_final_ice = np.nansum(ice_areas_final) * 1e6 sum_final_water = np.nansum(water_areas_final) * 1e6 # Find the fluxes associated with each ice area decrease in each box # In this case the trends act as a 'darea' value # Units are ((W/m2)/km2)*km2 = W/m2 fluxes = dflux_darea * ice_data['grid_ice_area_trend'] # Multiply the fluxes by the base area to find the power input in each # box. Convert the ice areas to square meters # Units are (W/m2) * m2 = W power_input_per_box = fluxes * (ice_areas_initial) # Taking the sum of the power input per box yields the total power input into # the whole base area.
rstate = np.random.RandomState(self.random_state) from Orange.preprocess._relieff import rrelieff weights = np.asarray( rrelieff(data.X, data.Y, self.n_iterations, self.k_nearest, np.array([a.is_discrete for a in data.domain.attributes]), rstate)) if feature: return weights[0] return weights if __name__ == '__main__': from Orange.data import Table X = np.random.random((500, 20)) X[np.random.random(X.shape) > .95] = np.nan y_cls = np.zeros(X.shape[0]) y_cls[(X[:, 0] > .5) ^ (X[:, 1] > .6)] = 1 y_reg = np.nansum(X[:, 0:3], 1) for relief, y in ((ReliefF(), y_cls), (RReliefF(), y_reg)): data = Table.from_numpy(None, X, y) weights = relief.score_data(data, False) print(relief.__class__.__name__) print('Best =', weights.argsort()[::-1]) print('Weights =', weights[weights.argsort()[::-1]]) X *= 10 data = Table.from_numpy(None, X, y_cls) weights = FCBF().score_data(data, False) print('FCBF') print('Best =', weights.argsort()[::-1]) print('Weights =', weights[weights.argsort()[::-1]])
def run(self, layers): """Indonesian Earthquake Fatality Model Input layers: List of layers expected to contain H: Raster layer of MMI ground shaking P: Raster layer of population density """ # Establish model coefficients x = self.parameters['x'] y = self.parameters['y'] # Define percentages of people being displaced at each mmi level displacement_rate = self.parameters['displacement_rate'] # Tolerance for transparency tolerance = self.parameters['tolerance'] # Extract input layers intensity = get_hazard_layer(layers) population = get_exposure_layer(layers) question = get_question(intensity.get_name(), population.get_name(), self) # Extract data grids H = intensity.get_data() # Ground Shaking P = population.get_data(scaling=True) # Population Density # Calculate population affected by each MMI level # FIXME (Ole): this range is 2-9. Should 10 be included? mmi_range = range(2, 10) number_of_exposed = {} number_of_displaced = {} number_of_fatalities = {} # Calculate fatality rates for observed Intensity values (H # based on ITB power model R = numpy.zeros(H.shape) for mmi in mmi_range: # Identify cells where MMI is in class i mask = (H > mmi - 0.5) * (H <= mmi + 0.5) # Count population affected by this shake level I = numpy.where(mask, P, 0) # Calculate expected number of fatalities per level fatality_rate = numpy.power(10.0, x * mmi - y) F = fatality_rate * I # Calculate expected number of displaced people per level try: D = displacement_rate[mmi] * I except KeyError, e: msg = 'mmi = %i, I = %s, Error msg: %s' % (mmi, str(I), str(e)) raise InaSAFEError(msg) # Adjust displaced people to disregard fatalities. # Set to zero if there are more fatalities than displaced. D = numpy.where(D > F, D - F, 0) # Sum up numbers for map R += D # Displaced # Generate text with result for this study # This is what is used in the real time system exposure table number_of_exposed[mmi] = numpy.nansum(I.flat) number_of_displaced[mmi] = numpy.nansum(D.flat) number_of_fatalities[mmi] = numpy.nansum(F.flat)
def phenomena(vis, ir, dep, z, zb, rth1, rth4, pblth, snrth): NX = ir.shape[-1] NZ = ir.shape[0] DZ = z[1] - z[0] DZinv = 1.0 / DZ iz_120 = int(round(0.12 * DZinv)) - 1 iz_240 = int(round(0.24 * DZinv)) - 1 iz_300 = int(round(0.3 * DZinv)) - 1 iz_450 = int(round(0.45 * DZinv)) - 1 iz_600 = int(round(0.6 * DZinv)) - 1 iz_1500 = int(round(1.5 * DZinv)) - 1 iz_3000 = int(round(3.0 * DZinv)) - 1 iz_9km = int(round(9.0 * DZinv)) - 1 iz_15km = int(round(15.0 * DZinv)) - 1 iz_16km = int(round(16.0 * DZinv)) - 1 iz_18km = int(round(18.0 * DZinv)) - 1 rf = np.zeros(NX) pbl = np.full(NX, np.nan) invtop = np.full(NX, np.nan) nlg = np.full(NX, 9.0) for ix in range(NX): profile_ir = ir[:, ix] profile_vis = vis[:, ix] profile_dep = dep[:, ix] if np.all(np.isnan(profile_ir)): continue diff = (profile_ir[1:] - profile_ir[:-1]) * DZinv with np.errstate(divide='ignore', invalid='ignore'): cr = np.where(profile_vis == 0, np.nan, profile_ir / profile_vis) ### std_tail = np.nanstd(profile_vis[iz_15km:iz_16km]) * snrth for iz in range(iz_450, iz_9km): if profile_vis[iz] < std_tail: nlg[ix] = z[iz] break ### Inversion must start > 600m zmax = np.nanmin([zb[ix], 9]) invtop[ix] = np.nanmin( [zb[ix] - DZ * 2**(zmax / 1.5) - 0.15, nlg[ix], 9.0]) ### Rain zmax = np.nanmin([zb[ix], 3]) iz_max = int(round(zmax * DZinv)) - 1 with np.errstate(invalid='ignore'): count = np.nansum(cr[iz_240:iz_max] >= 1.1) zmin = 0.24 if count * DZ >= (zmax - zmin) * rth1: rf[ix] = 1 continue ### Fog? if nlg[ix] <= 0.6: rf[ix] = 2 continue ### Snow if np.nanmean(profile_vis[iz_120:iz_240]) >= 5E5 and np.nanmean( profile_dep[iz_120:iz_240]) >= 0.2: rf[ix] = 3 continue zmax = np.nanmax([zb[ix], 1.5]) iz_max = int(round(zmax * DZinv)) - 1 if np.nanmin(diff[iz_240:iz_max]) < rth4 / 3E-2: rf[ix] = 4 continue if np.nanmax(profile_vis[iz_240:iz_max]) >= 5E6 and np.nanmax( profile_dep[iz_240:iz_max]) >= 0.3: rf[ix] = 5 continue ### PBL zmax = np.nanmin([zb[ix] - 0.15, 4.5]) iz_top = int(round(zmax * DZinv)) - 1 with np.errstate(invalid='ignore'): for iz in range(iz_600, iz_top): if np.nansum(profile_vis[iz-iz_300:iz])/np.nansum(profile_vis[iz+1:iz+iz_300+1]) >= pblth and \ max(profile_vis[iz_300:iz]) < min(profile_vis[iz_300:iz]) * 2.0: pbl[ix] = z[iz] continue return rf, pbl, invtop
def main(night_name=None, fpfile=None, hcfiles=None): """ cal_wave_spirou.py main function, if night_name and files are None uses arguments from run time i.e.: cal_wave_spirou.py [night_directory] [fpfile] [hcfiles] :param night_name: string or None, the folder within data reduced directory containing files (also reduced directory) i.e. /data/reduced/20170710 would be "20170710" but /data/reduced/AT5/20180409 is "AT5/20180409" :param fpfile: string, or None, the FP file to use for arg_file_names and fitsfilename (if None assumes arg_file_names was set from run time) :param hcfiles: string, list or None, the list of HC files to use for arg_file_names and fitsfilename (if None assumes arg_file_names was set from run time) :return ll: dictionary, containing all the local variables defined in main """ # ---------------------------------------------------------------------- # Set up # ---------------------------------------------------------------------- # get parameters from config files/run time args/load paths + calibdb p = spirouStartup.Begin(recipe=__NAME__) if hcfiles is None or fpfile is None: names, types = ['fpfile', 'hcfiles'], [str, str] customargs = spirouStartup.GetCustomFromRuntime(p, [0, 1], types, names, last_multi=True) else: customargs = dict(hcfiles=hcfiles, fpfile=fpfile) # get parameters from configuration files and run time arguments p = spirouStartup.LoadArguments(p, night_name, customargs=customargs, mainfitsdir='reduced', mainfitsfile='hcfiles') # ---------------------------------------------------------------------- # Construct reference filename and get fiber type # ---------------------------------------------------------------------- p, fpfitsfilename = spirouStartup.SingleFileSetup(p, filename=p['FPFILE']) fiber1 = str(p['FIBER']) p, hcfilenames = spirouStartup.MultiFileSetup(p, files=p['HCFILES']) fiber2 = str(p['FIBER']) # set the hcfilename to the first hcfilenames hcfitsfilename = hcfilenames[0] # ---------------------------------------------------------------------- # Once we have checked the e2dsfile we can load calibDB # ---------------------------------------------------------------------- # as we have custom arguments need to load the calibration database p = spirouStartup.LoadCalibDB(p) # ---------------------------------------------------------------------- # Have to check that the fibers match # ---------------------------------------------------------------------- if fiber1 == fiber2: p['FIBER'] = fiber1 fsource = __NAME__ + '/main() & spirouStartup.GetFiberType()' p.set_source('FIBER', fsource) else: emsg = 'Fiber not matching for {0} and {1}, should be the same' eargs = [hcfitsfilename, fpfitsfilename] WLOG(p, 'error', emsg.format(*eargs)) # set the fiber type p['FIB_TYP'] = [p['FIBER']] p.set_source('FIB_TYP', __NAME__ + '/main()') # ---------------------------------------------------------------------- # Read FP and HC files # ---------------------------------------------------------------------- # read and combine all HC files except the first (fpfitsfilename) rargs = [p, 'add', hcfitsfilename, hcfilenames[1:]] p, hcdata, hchdr = spirouImage.ReadImageAndCombine(*rargs) # read first file (fpfitsfilename) fpdata, fphdr, _, _ = spirouImage.ReadImage(p, fpfitsfilename) # add data and hdr to loc loc = ParamDict() loc['HCDATA'], loc['HCHDR'], loc['HCCDR'] = hcdata, hchdr, hchdr.comments loc['FPDATA'], loc['FPHDR'], loc['FPCDR'] = fpdata, fphdr, fphdr.comments # set the source sources = ['HCDATA', 'HCHDR', 'HCCDR'] loc.set_sources(sources, 'spirouImage.ReadImageAndCombine()') sources = ['FPDATA', 'FPHDR', 'FPCDR'] loc.set_sources(sources, 'spirouImage.ReadImage()') # ---------------------------------------------------------------------- # Get basic image properties for reference file # ---------------------------------------------------------------------- # get sig det value p = spirouImage.GetSigdet(p, hchdr, name='sigdet') # get exposure time p = spirouImage.GetExpTime(p, hchdr, name='exptime') # get gain p = spirouImage.GetGain(p, hchdr, name='gain') # get acquisition time p = spirouImage.GetAcqTime(p, hchdr, name='acqtime', kind='julian') bjdref = p['ACQTIME'] # set sigdet and conad keywords (sigdet is changed later) p['KW_CCD_SIGDET'][1] = p['SIGDET'] p['KW_CCD_CONAD'][1] = p['GAIN'] # get lamp parameters p = spirouWAVE2.get_lamp_parameters(p, hchdr) # get number of orders # we always get fibre A number because AB is doubled in constants file loc['NBO'] = p['QC_LOC_NBO_FPALL']['A'] loc.set_source('NBO', __NAME__ + '.main()') # get number of pixels in x from hcdata size loc['NBPIX'] = loc['HCDATA'].shape[1] loc.set_source('NBPIX', __NAME__ + '.main()') # ---------------------------------------------------------------------- # Read blaze # ---------------------------------------------------------------------- # get tilts p, loc['BLAZE'] = spirouImage.ReadBlazeFile(p, hchdr) loc.set_source('BLAZE', __NAME__ + '/main() + /spirouImage.ReadBlazeFile') # make copy of blaze (as it's overwritten later in CCF part) # TODO is this needed? More sensible to make and set copy in CCF? loc['BLAZE2'] = np.copy(loc['BLAZE']) # ---------------------------------------------------------------------- # Read wave solution # ---------------------------------------------------------------------- # wavelength file; we will use the polynomial terms in its header, # NOT the pixel values that would need to be interpolated # set source of wave file wsource = __NAME__ + '/main() + /spirouImage.GetWaveSolution' # Force A and B to AB solution if p['FIBER'] in ['A', 'B']: wave_fiber = 'AB' else: wave_fiber = p['FIBER'] # get wave image wout = spirouImage.GetWaveSolution(p, hdr=hchdr, return_wavemap=True, return_filename=True, fiber=wave_fiber) loc['WAVEPARAMS'], loc['WAVE_INIT'], loc['WAVEFILE'], loc['WSOURCE'] = wout loc.set_sources(['WAVE_INIT', 'WAVEFILE', 'WAVEPARAMS', 'WSOURCE'], wsource) poly_wave_sol = loc['WAVEPARAMS'] # ---------------------------------------------------------------------- # Check that wave parameters are consistent with "ic_ll_degr_fit" # ---------------------------------------------------------------------- loc = spirouImage.CheckWaveSolConsistency(p, loc) # ---------------------------------------------------------------------- # HC wavelength solution # ---------------------------------------------------------------------- # log that we are running the HC part and the mode wmsg = 'Now running the HC solution, mode = {0}' WLOG(p, 'info', wmsg.format(p['WAVE_MODE_HC'])) # get the solution loc = spirouWAVE2.do_hc_wavesol(p, loc) # ---------------------------------------------------------------------- # Quality control - HC solution # ---------------------------------------------------------------------- # set passed variable and fail message list passed, fail_msg = True, [] qc_values, qc_names, qc_logic, qc_pass = [], [], [], [] # ---------------------------------------------------------------------- # quality control on sigma clip (sig1 > qc_hc_wave_sigma_max if loc['SIG1'] > p['QC_HC_WAVE_SIGMA_MAX']: fmsg = 'Sigma too high ({0:.5f} > {1:.5f})' fail_msg.append(fmsg.format(loc['SIG1'], p['QC_HC_WAVE_SIGMA_MAX'])) passed = False qc_pass.append(0) else: qc_pass.append(1) # add to qc header lists qc_values.append(loc['SIG1']) qc_names.append('SIG1 HC') qc_logic.append('SIG1 > {0:.2f}'.format(p['QC_HC_WAVE_SIGMA_MAX'])) # ---------------------------------------------------------------------- # check the difference between consecutive orders is always positive # get the differences wave_diff = loc['WAVE_MAP2'][1:] - loc['WAVE_MAP2'][:-1] if np.min(wave_diff) < 0: fmsg = 'Negative wavelength difference between orders' fail_msg.append(fmsg) passed = False qc_pass.append(0) else: qc_pass.append(1) # add to qc header lists qc_values.append(np.min(wave_diff)) qc_names.append('MIN WAVE DIFF HC') qc_logic.append('MIN WAVE DIFF < 0') # ---------------------------------------------------------------------- # check the difference between consecutive pixels along an order is # always positive # loop through the orders ord_check = np.zeros((loc['NBO']), dtype=bool) for order in range(loc['NBO']): oc = np.all(loc['WAVE_MAP2'][order, 1:] > loc['WAVE_MAP2'][order, :-1]) ord_check[order] = oc # TODO: Melissa Why is this here???? # ord_check[5] = False if np.all(ord_check): qc_pass.append(1) qc_values.append('None') else: fmsg = 'Negative wavelength difference along an order' fail_msg.append(fmsg) passed = False qc_pass.append(0) qc_values.append(np.ndarray.tolist(np.where(~ord_check)[0])) # add to qc header lists # vale: array of orders where it fails qc_names.append('WAVE DIFF ALONG ORDER HC') qc_logic.append('WAVE DIFF ALONG ORDER < 0') # ---------------------------------------------------------------------- # finally log the failed messages and set QC = 1 if we pass the # quality control QC = 0 if we fail quality control if passed: WLOG(p, 'info', 'QUALITY CONTROL SUCCESSFUL - Well Done -') p['QC'] = 1 p.set_source('QC', __NAME__ + '/main()') else: for farg in fail_msg: wmsg = 'QUALITY CONTROL FAILED: {0}' WLOG(p, 'warning', wmsg.format(farg)) p['QC'] = 0 p.set_source('QC', __NAME__ + '/main()') # store in qc_params qc_params = [qc_names, qc_values, qc_logic, qc_pass] # ---------------------------------------------------------------------- # log the global stats # ---------------------------------------------------------------------- # calculate catalog-fit residuals in km/s res_hc = [] sumres_hc = 0.0 sumres2_hc = 0.0 for order in range(loc['NBO']): # get HC line wavelengths for the order order_mask = loc['ORD_T'] == order hc_x_ord = loc['XGAU_T'][order_mask] hc_ll_ord = np.polyval(loc['POLY_WAVE_SOL'][order][::-1], hc_x_ord) hc_ll_cat = loc['WAVE_CATALOG'][order_mask] hc_ll_diff = hc_ll_ord - hc_ll_cat res_hc.append(hc_ll_diff * speed_of_light / hc_ll_cat) sumres_hc += np.nansum(res_hc[order]) sumres2_hc += np.nansum(res_hc[order]**2) total_lines_hc = len(np.concatenate(res_hc)) final_mean_hc = sumres_hc / total_lines_hc final_var_hc = (sumres2_hc / total_lines_hc) - (final_mean_hc**2) wmsg1 = 'On fiber {0} HC fit line statistic:'.format(p['FIBER']) wargs2 = [ final_mean_hc * 1000.0, np.sqrt(final_var_hc) * 1000.0, total_lines_hc, 1000.0 * np.sqrt(final_var_hc / total_lines_hc) ] wmsg2 = ('\tmean={0:.3f}[m/s] rms={1:.1f} {2} HC lines (error on mean ' 'value:{3:.4f}[m/s])'.format(*wargs2)) WLOG(p, 'info', [wmsg1, wmsg2]) # ---------------------------------------------------------------------- # Save wave map to file # ---------------------------------------------------------------------- # TODO single file-naming function? Ask Neil # get base input filenames bfilenames = [] for raw_file in p['ARG_FILE_NAMES']: bfilenames.append(os.path.basename(raw_file)) # get wave filename wavefits, tag1 = spirouConfig.Constants.WAVE_FILE_EA(p) wavefitsname = os.path.basename(wavefits) # log progress WLOG(p, '', 'Saving wave map to {0}'.format(wavefitsname)) # log progress wargs = [p['FIBER'], wavefitsname] wmsg = 'Write wavelength solution for Fiber {0} in {1}' WLOG(p, '', wmsg.format(*wargs)) # write solution to fitsfilename header # copy original keys hdict = spirouImage.CopyOriginalKeys(loc['HCHDR'], loc['HCCDR']) # set the version hdict = spirouImage.AddKey(p, hdict, p['KW_VERSION']) # TODO add DRS_DATE and DRS_NOW hdict = spirouImage.AddKey(p, hdict, p['KW_PID'], value=p['PID']) hdict = spirouImage.AddKey(p, hdict, p['KW_OUTPUT'], value=tag1) # set the input files hdict = spirouImage.AddKey(p, hdict, p['KW_CDBBAD'], value=p['BLAZFILE']) # add qc parameters hdict = spirouImage.AddKey(p, hdict, p['KW_DRS_QC'], value=p['QC']) hdict = spirouImage.AddQCKeys(p, hdict, qc_params) # add wave solution date hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_TIME1'], value=p['MAX_TIME_HUMAN']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_TIME2'], value=p['MAX_TIME_UNIX']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_CODE'], value=__NAME__) hdict = spirouImage.AddKey(p, hdict, p['KW_CDBWAVE'], value=loc['WAVEFILE']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVESOURCE'], value=loc['WSOURCE']) hdict = spirouImage.AddKey1DList(p, hdict, p['KW_INFILE1'], dim1name='file', values=p['ARG_FILE_NAMES']) # add number of orders hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_ORD_N'], value=loc['POLY_WAVE_SOL'].shape[0]) # add degree of fit hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_LL_DEG'], value=loc['POLY_WAVE_SOL'].shape[1] - 1) # add wave solution hdict = spirouImage.AddKey2DList(p, hdict, p['KW_WAVE_PARAM'], values=loc['POLY_WAVE_SOL']) # write the wave "spectrum" p = spirouImage.WriteImage(p, wavefits, loc['WAVE_MAP2'], hdict) # get filename for E2DS calibDB copy of FITSFILENAME e2dscopy_filename, tag2 = spirouConfig.Constants.WAVE_E2DS_COPY(p) wargs = [p['FIBER'], os.path.split(e2dscopy_filename)[-1]] wmsg = 'Write reference E2DS spectra for Fiber {0} in {1}' WLOG(p, '', wmsg.format(*wargs)) # make a copy of the E2DS file for the calibBD hdict = spirouImage.AddKey(p, hdict, p['KW_OUTPUT'], value=tag2) p = spirouImage.WriteImage(p, e2dscopy_filename, loc['HCDATA'], hdict) # ---------------------------------------------------------------------- # Save resolution and line profiles to file # ---------------------------------------------------------------------- raw_infile = os.path.basename(p['FITSFILENAME']) # get wave filename resfits, tag3 = spirouConfig.Constants.WAVE_RES_FILE_EA(p) resfitsname = os.path.basename(resfits) WLOG(p, '', 'Saving wave resmap to {0}'.format(resfitsname)) # make a copy of the E2DS file for the calibBD # set the version hdict = spirouImage.AddKey(p, hdict, p['KW_VERSION']) # TODO add DRS_DATE and DRS_NOW hdict = spirouImage.AddKey(p, hdict, p['KW_OUTPUT'], value=tag3) # get res data in correct format resdata, hdicts = spirouWAVE2.generate_res_files(p, loc, hdict) # save to file p = spirouImage.WriteImageMulti(p, resfits, resdata, hdicts=hdicts) # ---------------------------------------------------------------------- # Update calibDB # ---------------------------------------------------------------------- if p['QC']: # set the wave key keydb = 'WAVE_{0}'.format(p['FIBER']) # copy wave file to calibDB folder spirouDB.PutCalibFile(p, wavefits) # update the master calib DB file with new key spirouDB.UpdateCalibMaster(p, keydb, wavefitsname, loc['HCHDR']) # set the hcref key keydb = 'HCREF_{0}'.format(p['FIBER']) # copy wave file to calibDB folder spirouDB.PutCalibFile(p, e2dscopy_filename) # update the master calib DB file with new key e2dscopyfits = os.path.split(e2dscopy_filename)[-1] spirouDB.UpdateCalibMaster(p, keydb, e2dscopyfits, loc['HCHDR']) # ---------------------------------------------------------------------- # Update header of current files # ---------------------------------------------------------------------- # only copy over if QC passed if p['QC']: rdir = os.path.dirname(wavefits) # loop around hc files and update header with for rawhcfile in p['ARG_FILE_NAMES']: hcfile = os.path.join(rdir, rawhcfile) raw_infilepath1 = os.path.join(p['ARG_FILE_DIR'], hcfile) p = spirouImage.UpdateWaveSolutionHC(p, loc, raw_infilepath1) # ---------------------------------------------------------------------- # HC+FP wavelength solution # ---------------------------------------------------------------------- # check if there's a FP input and if HC solution passed QCs if has_fp and p['QC']: # log that we are doing the FP solution wmsg = 'Now running the combined FP-HC solution, mode = {}' WLOG(p, 'info', wmsg.format(p['WAVE_MODE_FP'])) # do the wavelength solution loc = spirouWAVE2.do_fp_wavesol(p, loc) # ---------------------------------------------------------------------- # Quality control # ---------------------------------------------------------------------- # get parameters ffrom p p['QC_RMS_LITTROW_MAX'] = p['QC_HC_RMS_LITTROW_MAX'] p['QC_DEV_LITTROW_MAX'] = p['QC_HC_DEV_LITTROW_MAX'] # set passed variable and fail message list # passed, fail_msg = True, [] # qc_values, qc_names, qc_logic, qc_pass = [], [], [], [] # ---------------------------------------------------------------------- # check the difference between consecutive orders is always positive # get the differences wave_diff = loc['LL_FINAL'][1:] - loc['LL_FINAL'][:-1] if np.min(wave_diff) < 0: fmsg = 'Negative wavelength difference between orders' fail_msg.append(fmsg) passed = False qc_pass.append(0) else: qc_pass.append(1) # add to qc header lists qc_values.append(np.min(wave_diff)) qc_names.append('MIN WAVE DIFF FP-HC') qc_logic.append('MIN WAVE DIFF < 0') # ---------------------------------------------------------------------- # check for infinites and NaNs in mean residuals from fit if ~np.isfinite(loc['X_MEAN_2']): # add failed message to the fail message list fmsg = 'NaN or Inf in X_MEAN_2' fail_msg.append(fmsg) passed = False qc_pass.append(0) else: qc_pass.append(1) # add to qc header lists qc_values.append(loc['X_MEAN_2']) qc_names.append('X_MEAN_2') qc_logic.append('X_MEAN_2 not finite') # ---------------------------------------------------------------------- # iterate through Littrow test cut values lit_it = 2 # checks every other value # TODO: This QC check (or set of QC checks needs re-writing it is # TODO: nearly impossible to understand for x_it in range(1, len(loc['X_CUT_POINTS_' + str(lit_it)]), 2): # get x cut point x_cut_point = loc['X_CUT_POINTS_' + str(lit_it)][x_it] # get the sigma for this cut point sig_littrow = loc['LITTROW_SIG_' + str(lit_it)][x_it] # get the abs min and max dev littrow values min_littrow = abs(loc['LITTROW_MINDEV_' + str(lit_it)][x_it]) max_littrow = abs(loc['LITTROW_MAXDEV_' + str(lit_it)][x_it]) # get the corresponding order min_littrow_ord = loc['LITTROW_MINDEVORD_' + str(lit_it)][x_it] max_littrow_ord = loc['LITTROW_MAXDEVORD_' + str(lit_it)][x_it] # check if sig littrow is above maximum rms_littrow_max = p['QC_RMS_LITTROW_MAX'] dev_littrow_max = p['QC_DEV_LITTROW_MAX'] if sig_littrow > rms_littrow_max: fmsg = ('Littrow test (x={0}) failed (sig littrow = ' '{1:.2f} > {2:.2f})') fargs = [x_cut_point, sig_littrow, rms_littrow_max] fail_msg.append(fmsg.format(*fargs)) passed = False qc_pass.append(0) else: qc_pass.append(1) # add to qc header lists qc_values.append(sig_littrow) qc_names.append('sig_littrow') qc_logic.append('sig_littrow > {0:.2f}'.format(rms_littrow_max)) # ---------------------------------------------------------------------- # check if min/max littrow is out of bounds if np.max([max_littrow, min_littrow]) > dev_littrow_max: fmsg = ('Littrow test (x={0}) failed (min|max dev = ' '{1:.2f}|{2:.2f} > {3:.2f} for order {4}|{5})') fargs = [ x_cut_point, min_littrow, max_littrow, dev_littrow_max, min_littrow_ord, max_littrow_ord ] fail_msg.append(fmsg.format(*fargs)) passed = False qc_pass.append(0) # TODO: Should this be the QC header values? # TODO: it does not change the outcome of QC (i.e. passed=False) # TODO: So what is the point? # if sig was out of bounds, recalculate if sig_littrow > rms_littrow_max: # conditions check1 = min_littrow > dev_littrow_max check2 = max_littrow > dev_littrow_max # get the residuals respix = loc['LITTROW_YY_' + str(lit_it)][x_it] # check if both are out of bounds if check1 and check2: # remove respective orders worst_order = (min_littrow_ord, max_littrow_ord) respix_2 = np.delete(respix, worst_order) redo_sigma = True # check if min is out of bounds elif check1: # remove respective order worst_order = min_littrow_ord respix_2 = np.delete(respix, worst_order) redo_sigma = True # check if max is out of bounds elif check2: # remove respective order worst_order = max_littrow_ord respix_2 = np.delete(respix, max_littrow_ord) redo_sigma = True # else do not recalculate sigma else: redo_sigma, respix_2, worst_order = False, None, None wmsg = 'No outlying orders, sig littrow not recalculated' fail_msg.append(wmsg.format()) # if outlying order, recalculate stats if redo_sigma: mean = np.nansum(respix_2) / len(respix_2) mean2 = np.nansum(respix_2**2) / len(respix_2) rms = np.sqrt(mean2 - mean**2) if rms > rms_littrow_max: fmsg = ( 'Littrow test (x={0}) failed (sig littrow = ' '{1:.2f} > {2:.2f} removing order {3})') fargs = [ x_cut_point, rms, rms_littrow_max, worst_order ] fail_msg.append(fmsg.format(*fargs)) else: wargs = [ x_cut_point, rms, rms_littrow_max, worst_order ] wmsg = ( 'Littrow test (x={0}) passed (sig littrow = ' '{1:.2f} > {2:.2f} removing order {3})') fail_msg.append(wmsg.format(*wargs)) else: qc_pass.append(1) # add to qc header lists qc_values.append(np.max([max_littrow, min_littrow])) qc_names.append('max or min littrow') qc_logic.append('max or min littrow > {0:.2f}' ''.format(dev_littrow_max)) # finally log the failed messages and set QC = 1 if we pass the # quality control QC = 0 if we fail quality control if passed: WLOG(p, 'info', 'QUALITY CONTROL SUCCESSFUL - Well Done -') p['QC'] = 1 p.set_source('QC', __NAME__ + '/main()') else: for farg in fail_msg: wmsg = 'QUALITY CONTROL FAILED: {0}' WLOG(p, 'warning', wmsg.format(farg)) p['QC'] = 0 p.set_source('QC', __NAME__ + '/main()') # store in qc_params qc_params = [qc_names, qc_values, qc_logic, qc_pass] # ------------------------------------------------------------------ # archive result in e2ds spectra # ------------------------------------------------------------------ # get raw input file name(s) raw_infiles1 = [] for hcfile in p['HCFILES']: raw_infiles1.append(os.path.basename(hcfile)) raw_infile2 = os.path.basename(p['FPFILE']) # get wave filename wavefits, tag1 = spirouConfig.Constants.WAVE_FILE_EA_2(p) wavefitsname = os.path.split(wavefits)[-1] # log progress wargs = [p['FIBER'], wavefits] wmsg = 'Write wavelength solution for Fiber {0} in {1}' WLOG(p, '', wmsg.format(*wargs)) # write solution to fitsfilename header # copy original keys hdict = spirouImage.CopyOriginalKeys(loc['HCHDR'], loc['HCCDR']) # add version number hdict = spirouImage.AddKey(p, hdict, p['KW_VERSION']) hdict = spirouImage.AddKey(p, hdict, p['KW_PID'], value=p['PID']) # set the input files hdict = spirouImage.AddKey(p, hdict, p['KW_CDBBAD'], value=p['BLAZFILE']) hdict = spirouImage.AddKey(p, hdict, p['KW_CDBWAVE'], value=loc['WAVEFILE']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVESOURCE'], value=loc['WSOURCE']) hdict = spirouImage.AddKey1DList(p, hdict, p['KW_INFILE1'], dim1name='fpfile', values=p['FPFILE']) hdict = spirouImage.AddKey1DList(p, hdict, p['KW_INFILE2'], dim1name='hcfile', values=p['HCFILES']) # add qc parameters hdict = spirouImage.AddKey(p, hdict, p['KW_DRS_QC'], value=p['QC']) hdict = spirouImage.AddQCKeys(p, hdict, qc_params) # add wave solution date hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_TIME1'], value=p['MAX_TIME_HUMAN']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_TIME2'], value=p['MAX_TIME_UNIX']) hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_CODE'], value=__NAME__) # add number of orders hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_ORD_N'], value=loc['LL_PARAM_FINAL'].shape[0]) # add degree of fit hdict = spirouImage.AddKey(p, hdict, p['KW_WAVE_LL_DEG'], value=loc['LL_PARAM_FINAL'].shape[1] - 1) # add wave solution hdict = spirouImage.AddKey2DList(p, hdict, p['KW_WAVE_PARAM'], values=loc['LL_PARAM_FINAL']) # add FP CCF drift # target RV and width hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_TARG_RV'], value=p['TARGET_RV']) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_WIDTH'], value=p['CCF_WIDTH']) # the rv step # rvstep = np.abs(loc['RV_CCF'][0] - loc['RV_CCF'][1]) # hdict = spirouImage.AddKey(p, hdict, p['KW_CCF_CDELT'], value=rvstep) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_STEP'], value=p['CCF_STEP']) # add ccf stats hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_DRIFT'], value=loc['CCF_RES'][1]) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_FWHM'], value=loc['FWHM']) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_CONTRAST'], value=loc['CONTRAST']) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_MAXCPP'], value=loc['MAXCPP']) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_MASK'], value=p['CCF_MASK']) hdict = spirouImage.AddKey(p, hdict, p['KW_WFP_LINES'], value=np.nansum(loc['TOT_LINE'])) # write the wave "spectrum" hdict = spirouImage.AddKey(p, hdict, p['KW_OUTPUT'], value=tag1) p = spirouImage.WriteImage(p, wavefits, loc['LL_FINAL'], hdict) # get filename for E2DS calibDB copy of FITSFILENAME e2dscopy_filename = spirouConfig.Constants.WAVE_E2DS_COPY(p)[0] wargs = [p['FIBER'], os.path.split(e2dscopy_filename)[-1]] wmsg = 'Write reference E2DS spectra for Fiber {0} in {1}' WLOG(p, '', wmsg.format(*wargs)) # make a copy of the E2DS file for the calibBD p = spirouImage.WriteImage(p, e2dscopy_filename, loc['HCDATA'], hdict) # only copy over if QC passed if p['QC']: # loop around hc files and update header with for hcfile in p['HCFILES']: raw_infilepath1 = os.path.join(p['ARG_FILE_DIR'], hcfile) p = spirouImage.UpdateWaveSolution(p, loc, raw_infilepath1) # update fp file raw_infilepath2 = os.path.join(p['ARG_FILE_DIR'], raw_infile2) p = spirouImage.UpdateWaveSolution(p, loc, raw_infilepath2) # ------------------------------------------------------------------ # Save to result table # ------------------------------------------------------------------ # calculate stats for table final_mean = 1000 * loc['X_MEAN_2'] final_var = 1000 * loc['X_VAR_2'] num_lines = loc['TOTAL_LINES_2'] err = 1000 * np.sqrt(loc['X_VAR_2'] / num_lines) sig_littrow = 1000 * np.array(loc['LITTROW_SIG_' + str(lit_it)]) # construct filename wavetbl = spirouConfig.Constants.WAVE_TBL_FILE_EA(p) wavetblname = os.path.basename(wavetbl) # construct and write table columnnames = [ 'night_name', 'file_name', 'fiber', 'mean', 'rms', 'N_lines', 'err', 'rms_L500', 'rms_L1000', 'rms_L1500', 'rms_L2000', 'rms_L2500', 'rms_L3000', 'rms_L3500' ] columnformats = [ '{:20s}', '{:30s}', '{:3s}', '{:7.4f}', '{:6.2f}', '{:3d}', '{:6.3f}', '{:6.2f}', '{:6.2f}', '{:6.2f}', '{:6.2f}', '{:6.2f}', '{:6.2f}', '{:6.2f}' ] columnvalues = [[p['ARG_NIGHT_NAME']], [p['ARG_FILE_NAMES'][0]], [p['FIBER']], [final_mean], [final_var], [num_lines], [err], [sig_littrow[0]], [sig_littrow[1]], [sig_littrow[2]], [sig_littrow[3]], [sig_littrow[4]], [sig_littrow[5]], [sig_littrow[6]]] # make table table = spirouImage.MakeTable(p, columns=columnnames, values=columnvalues, formats=columnformats) # merge table wmsg = 'Global result summary saved in {0}' WLOG(p, '', wmsg.format(wavetblname)) spirouImage.MergeTable(p, table, wavetbl, fmt='ascii.rst') # ------------------------------------------------------------------ # Save line list table file # ------------------------------------------------------------------ # construct filename # TODO proper column values wavelltbl = spirouConfig.Constants.WAVE_LINE_FILE_EA(p) wavelltblname = os.path.split(wavelltbl)[-1] # construct and write table columnnames = ['order', 'll', 'dv', 'w', 'xi', 'xo', 'dvdx'] columnformats = [ '{:.0f}', '{:12.4f}', '{:13.5f}', '{:12.4f}', '{:12.4f}', '{:12.4f}', '{:8.4f}' ] columnvalues = [] # construct column values (flatten over orders) for it in range(len(loc['X_DETAILS_2'])): for jt in range(len(loc['X_DETAILS_2'][it][0])): row = [ float(it), loc['X_DETAILS_2'][it][0][jt], loc['LL_DETAILS_2'][it][0][jt], loc['X_DETAILS_2'][it][3][jt], loc['X_DETAILS_2'][it][1][jt], loc['X_DETAILS_2'][it][2][jt], loc['SCALE_2'][it][jt] ] columnvalues.append(row) # log saving wmsg = 'List of lines used saved in {0}' WLOG(p, '', wmsg.format(wavelltblname)) # make table columnvalues = np.array(columnvalues).T table = spirouImage.MakeTable(p, columns=columnnames, values=columnvalues, formats=columnformats) # write table spirouImage.WriteTable(p, table, wavelltbl, fmt='ascii.rst') # ------------------------------------------------------------------ # Move to calibDB and update calibDB # ------------------------------------------------------------------ if p['QC']: # set the wave key keydb = 'WAVE_{0}'.format(p['FIBER']) # copy wave file to calibDB folder spirouDB.PutCalibFile(p, wavefits) # update the master calib DB file with new key spirouDB.UpdateCalibMaster(p, keydb, wavefitsname, loc['HCHDR']) # set the hcref key keydb = 'HCREF_{0}'.format(p['FIBER']) # copy wave file to calibDB folder spirouDB.PutCalibFile(p, e2dscopy_filename) # update the master calib DB file with new key e2dscopyfits = os.path.split(e2dscopy_filename)[-1] spirouDB.UpdateCalibMaster(p, keydb, e2dscopyfits, loc['HCHDR']) # If the HC solution failed QCs we do not compute FP-HC solution elif has_fp and not p['QC']: wmsg = 'HC solution failed quality controls; FP not processed' WLOG(p, 'warning', wmsg) # If there is no FP file we log that elif not has_fp: wmsg = 'No FP file given; FP-HC combined solution cannot be generated' WLOG(p, 'warning', wmsg) # ---------------------------------------------------------------------- # End Message # ---------------------------------------------------------------------- p = spirouStartup.End(p) # return p and loc return dict(locals())
class ITBFatalityFunctionConfigurable(FunctionProvider): """Indonesian Earthquake Fatality Model This model was developed by Institut Teknologi Bandung (ITB) and implemented by Dr. Hadi Ghasemi, Geoscience Australia. Reference: Indonesian Earthquake Building-Damage and Fatality Models and Post Disaster Survey Guidelines Development, Bali, 27-28 February 2012, 54pp. Algorithm: In this study, the same functional form as Allen (2009) is adopted to express fatality rate as a function of intensity (see Eq. 10 in the report). The Matlab built-in function (fminsearch) for Nelder-Mead algorithm is used to estimate the model parameters. The objective function (L2G norm) that is minimised during the optimisation is the same as the one used by Jaiswal et al. (2010). The coefficients used in the indonesian model are x=0.62275231, y=8.03314466, zeta=2.15 Allen, T. I., Wald, D. J., Earle, P. S., Marano, K. D., Hotovec, A. J., Lin, K., and Hearne, M., 2009. An Atlas of ShakeMaps and population exposure catalog for earthquake loss modeling, Bull. Earthq. Eng. 7, 701-718. Jaiswal, K., and Wald, D., 2010. An empirical model for global earthquake fatality estimation, Earthq. Spectra 26, 1017-1037. Caveats and limitations: The current model is the result of the above mentioned workshop and reflects the best available information. However, the current model has a number of issues listed below and is expected to evolve further over time. 1 - The model is based on limited number of observed fatality rates during 4 past fatal events. 2 - The model clearly over-predicts the fatality rates at intensities higher than VIII. 3 - The model only estimates the expected fatality rate for a given intensity level; however the associated uncertainty for the proposed model is not addressed. 4 - There are few known mistakes in developing the current model: - rounding MMI values to the nearest 0.5, - Implementing Finite-Fault models of candidate events, and - consistency between selected GMPEs with those in use by BMKG. These issues will be addressed by ITB team in the final report. :author Hadi Ghasemi :rating 3 :param requires category=='hazard' and \ subcategory=='earthquake' and \ layertype=='raster' and \ unit=='MMI' :param requires category=='exposure' and \ subcategory=='population' and \ layertype=='raster' """ title = tr('Die or be displaced') defaults = get_defaults() parameters = OrderedDict([ ('x', 0.62275231), ('y', 8.03314466), # Model coefficients # Rates of people displaced for each MMI level ('displacement_rate', {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 1.0, 7: 1.0, 8: 1.0, 9: 1.0, 10: 1.0}), # Threshold below which layer should be transparent ('tolerance', 0.01), ('calculate_displaced_people', True), ('postprocessors', OrderedDict([ ('Gender', {'on': True}), ('Age', { 'on': True, 'params': OrderedDict([ ('youth_ratio', defaults['YOUTH_RATIO']), ('adult_ratio', defaults['ADULT_RATIO']), ('elder_ratio', defaults['ELDER_RATIO'])])}), ('MinimumNeeds', {'on': True})]))]) def run(self, layers): """Indonesian Earthquake Fatality Model Input layers: List of layers expected to contain H: Raster layer of MMI ground shaking P: Raster layer of population density """ # Establish model coefficients x = self.parameters['x'] y = self.parameters['y'] # Define percentages of people being displaced at each mmi level displacement_rate = self.parameters['displacement_rate'] # Tolerance for transparency tolerance = self.parameters['tolerance'] # Extract input layers intensity = get_hazard_layer(layers) population = get_exposure_layer(layers) question = get_question(intensity.get_name(), population.get_name(), self) # Extract data grids H = intensity.get_data() # Ground Shaking P = population.get_data(scaling=True) # Population Density # Calculate population affected by each MMI level # FIXME (Ole): this range is 2-9. Should 10 be included? mmi_range = range(2, 10) number_of_exposed = {} number_of_displaced = {} number_of_fatalities = {} # Calculate fatality rates for observed Intensity values (H # based on ITB power model R = numpy.zeros(H.shape) for mmi in mmi_range: # Identify cells where MMI is in class i mask = (H > mmi - 0.5) * (H <= mmi + 0.5) # Count population affected by this shake level I = numpy.where(mask, P, 0) # Calculate expected number of fatalities per level fatality_rate = numpy.power(10.0, x * mmi - y) F = fatality_rate * I # Calculate expected number of displaced people per level try: D = displacement_rate[mmi] * I except KeyError, e: msg = 'mmi = %i, I = %s, Error msg: %s' % (mmi, str(I), str(e)) raise InaSAFEError(msg) # Adjust displaced people to disregard fatalities. # Set to zero if there are more fatalities than displaced. D = numpy.where(D > F, D - F, 0) # Sum up numbers for map R += D # Displaced # Generate text with result for this study # This is what is used in the real time system exposure table number_of_exposed[mmi] = numpy.nansum(I.flat) number_of_displaced[mmi] = numpy.nansum(D.flat) number_of_fatalities[mmi] = numpy.nansum(F.flat) # Set resulting layer to NaN when less than a threshold. This is to # achieve transparency (see issue #126). R[R < tolerance] = numpy.nan # Total statistics total = int(round(numpy.nansum(P.flat) / 1000) * 1000) # Compute number of fatalities fatalities = int(round(numpy.nansum(number_of_fatalities.values()) / 1000)) * 1000 # Compute number of people displaced due to building collapse displaced = int(round(numpy.nansum(number_of_displaced.values()) / 1000)) * 1000 # Generate impact report table_body = [question] # Add total fatality estimate s = str(int(fatalities)).rjust(10) table_body.append(TableRow([tr('Number of fatalities'), s], header=True)) if self.parameters['calculate_displaced_people']: # Add total estimate of people displaced s = str(int(displaced)).rjust(10) table_body.append(TableRow([tr('Number of people displaced'), s], header=True)) else: displaced = 0 # Add estimate of total population in area s = str(int(total)).rjust(10) table_body.append(TableRow([tr('Total number of people'), s], header=True)) # Calculate estimated needs based on BNPB Perka 7/2008 minimum bantuan rice = displaced * 2.8 drinking_water = displaced * 17.5 water = displaced * 67 family_kits = displaced / 5 toilets = displaced / 20 # Generate impact report for the pdf map table_body = [question, TableRow([tr('Fatalities'), '%i' % fatalities], header=True), TableRow([tr('People displaced'), '%i' % displaced], header=True), TableRow(tr('Map shows density estimate of ' 'displaced population')), TableRow([tr('Needs per week'), tr('Total')], header=True), [tr('Rice [kg]'), int(rice)], [tr('Drinking Water [l]'), int(drinking_water)], [tr('Clean Water [l]'), int(water)], [tr('Family Kits'), int(family_kits)], [tr('Toilets'), int(toilets)]] impact_table = Table(table_body).toNewlineFreeString() table_body.append(TableRow(tr('Action Checklist:'), header=True)) if fatalities > 0: table_body.append(tr('Are there enough victim identification ' 'units available for %i people?') % fatalities) if displaced > 0: table_body.append(tr('Are there enough shelters and relief items ' 'available for %i people?') % displaced) table_body.append(TableRow(tr('If yes, where are they located and ' 'how will we distribute them?'))) table_body.append(TableRow(tr('If no, where can we obtain ' 'additional relief items from and ' 'how will we transport them?'))) # Extend impact report for on-screen display table_body.extend([TableRow(tr('Notes'), header=True), tr('Total population: %i') % total, tr('People are considered to be displaced if ' 'they experience and survive a shake level' 'of more than 5 on the MMI scale '), tr('Minimum needs are defined in BNPB ' 'regulation 7/2008')]) impact_summary = Table(table_body).toNewlineFreeString() map_title = tr('People in need of evacuation') table_body.append(TableRow(tr('Notes'), header=True)) table_body.append(tr('Fatality model is from ' 'Institute of Teknologi Bandung 2012.')) table_body.append(tr('Population numbers rounded to nearest 1000.')) impact_summary = Table(table_body).toNewlineFreeString() impact_table = impact_summary map_title = tr('Earthquake impact to population') # Create style info dynamically classes = numpy.linspace(numpy.nanmin(R.flat[:]), numpy.nanmax(R.flat[:]), 5) style_classes = [dict(colour='#EEFFEE', quantity=classes[0], transparency=100, label=tr('%.2f people/cell') % classes[0]), dict(colour='#FFFF7F', quantity=classes[1], transparency=30), dict(colour='#E15500', quantity=classes[2], transparency=30, label=tr('%.2f people/cell') % classes[2]), dict(colour='#E4001B', quantity=classes[3], transparency=30), dict(colour='#730000', quantity=classes[4], transparency=30, label=tr('%.2f people/cell') % classes[4])] style_info = dict(target_field=None, style_classes=style_classes) # Create new layer and return L = Raster(R, projection=population.get_projection(), geotransform=population.get_geotransform(), keywords={'impact_summary': impact_summary, 'total_population': total, 'total_fatalities': fatalities, 'impact_table': impact_table, 'map_title': map_title}, name=tr('Estimated displaced population'), style_info=style_info) # Maybe return a shape file with contours instead return L
def distribute_thickness_per_altitude(gdir, add_slope=True, smooth_radius=None, dis_from_border_exp=0.25, varname_suffix=''): """Compute a thickness map by redistributing mass along altitudinal bands. This is a rather cosmetic task, not relevant for OGGM but for ITMIX. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process add_slope : bool whether a corrective slope factor should be used or not smooth_radius : int pixel size of the gaussian smoothing. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in meters). Set to zero to suppress smoothing. dis_from_border_exp : float the exponent of the distance from border mask varname_suffix : str add a suffix to the variable written in the file (for experiments) """ # Variables grids_file = gdir.get_filepath('gridded_data') # See if we have the masks, else compute them with utils.ncDataset(grids_file) as nc: has_masks = 'glacier_ext_erosion' in nc.variables if not has_masks: from oggm.core.gis import gridded_attributes gridded_attributes(gdir) with utils.ncDataset(grids_file) as nc: topo_smoothed = nc.variables['topo_smoothed'][:] glacier_mask = nc.variables['glacier_mask'][:] dis_from_border = nc.variables['dis_from_border'][:] if add_slope: slope_factor = nc.variables['slope_factor'][:] else: slope_factor = 1. # Along the lines cls = gdir.read_pickle('inversion_output') fls = gdir.read_pickle('inversion_flowlines') hs, ts, vs, xs, ys = [], [], [], [], [] for cl, fl in zip(cls, fls): hs = np.append(hs, fl.surface_h) ts = np.append(ts, cl['thick']) vs = np.append(vs, cl['volume']) x, y = fl.line.xy xs = np.append(xs, x) ys = np.append(ys, y) init_vol = np.sum(vs) # Assign a first order thickness to the points # very inefficient inverse distance stuff thick = glacier_mask * np.NaN for y in range(thick.shape[0]): for x in range(thick.shape[1]): phgt = topo_smoothed[y, x] # take the ones in a 100m range starth = 100. while True: starth += 10 pok = np.nonzero(np.abs(phgt - hs) <= starth)[0] if len(pok) != 0: break sqr = np.sqrt((xs[pok] - x)**2 + (ys[pok] - y)**2) pzero = np.where(sqr == 0) if len(pzero[0]) == 0: thick[y, x] = np.average(ts[pok], weights=1 / sqr) elif len(pzero[0]) == 1: thick[y, x] = ts[pzero] else: raise RuntimeError('We should not be there') # Distance from border (normalized) dis_from_border = dis_from_border**dis_from_border_exp dis_from_border /= np.mean(dis_from_border[glacier_mask == 1]) thick *= dis_from_border # Slope thick *= slope_factor # Smooth dx = gdir.grid.dx if smooth_radius != 0: if smooth_radius is None: smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx) thick = gaussian_blur(thick, np.int(smooth_radius)) thick = np.where(glacier_mask, thick, 0.) # Re-mask utils.clip_min(thick, 0, out=thick) thick[glacier_mask == 0] = np.NaN assert np.all(np.isfinite(thick[glacier_mask == 1])) # Conserve volume tmp_vol = np.nansum(thick * dx**2) thick *= init_vol / tmp_vol # write with utils.ncDataset(grids_file, 'a') as nc: vn = 'distributed_thickness' + varname_suffix if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ( 'y', 'x', ), zlib=True) v.units = '-' v.long_name = 'Distributed ice thickness' v[:] = thick return thick
MSE_t = np.mean(h_convs[p_outi - 1:p_outi + 1]) s_BL = np.nanmean(s_convs[:BLi]) s_t = np.mean(s_convs[p_outi - 1:p_outi + 1]) delhtrop = MSE_BL - MSE_t delstrop = s_BL - s_t MSE_BL = np.nanmean(h_convs[:BLi]) MSE_t = h_convs[p_outi] s_BL = np.nanmean(s_convs[:BLi]) s_t = s_convs[p_outi] delhtrop = MSE_BL - MSE_t delstrop = s_BL - s_t NGMS = np.nansum(divh * delz) / np.nansum(divs * delz) #print 'vertically averaged sigma', sigmabar #print 'vertically averaged l_m (km)', l_mbar #print 'vertically averaged v_n (m/s)', v_nbar #print 'vertically averaged mass flux (kg/s)', np.mean(divs) print 'vertically averaged divergence of MSE (W/m^2)', np.nanmean(divh) print 'vertical integral of MSE divergence (W/m^2)', np.nansum(divh * delz) print 'vertical integral of DSE divergence (W/m^2)', np.nansum(divs * delz) print 'Normalized GMS', NGMS print 'vertically averaged mass flux magnitude (kg/m^2)', divbar print 'delh_trop', delhtrop print 'MSE_BL', MSE_BL print 'MSE_t', MSE_t print 'dels_trop', delstrop print 's_BL', s_BL
con.done() else: values.append( img.pixelvalue(img.summary()['refpix'])['value']['value'] * 1e3) noise.append(img.statistics()['rms'][0] * 1e3) img.done() vals = np.array(values).reshape((36, 100)) rms = np.array(noise).reshape((36, 100)) stacked_vals = [] error = [] for i, j in tqdm(zip(vals, rms)): stacked_vals.append(np.nansum(i * (1 / j**2)) / np.nansum(1 / j**2)) error.append(np.sqrt(1 / np.nansum(1 / j**2))) stacked_vals = np.array(stacked_vals).reshape((4, 9)) error = np.array(error).reshape((4, 9)) os.chdir('/vol/arc2/archive2/ziad/data') np.save('test_stacks' + set + '.npy', stacked_vals) np.save('test_errors' + set + '.npy', error) np.save('test_noise' + set + '.npy', rms) for set in tqdm(setnum2): files = glob.glob('/vol/arc2/archive2/ziad/simulations/set' + set + '/sim_*') files.sort(key=lambda f: int(filter(str.isdigit, f)))
def distribute_thickness_interp(gdir, add_slope=True, smooth_radius=None, varname_suffix=''): """Compute a thickness map by interpolating between centerlines and border. IMPORTANT: this is NOT what has been used for ITMIX. We used distribute_thickness_per_altitude for ITMIX and global ITMIX. This is a rather cosmetic task, not relevant for OGGM but for ITMIX. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process add_slope : bool whether a corrective slope factor should be used or not smooth_radius : int pixel size of the gaussian smoothing. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in meters). Set to zero to suppress smoothing. varname_suffix : str add a suffix to the variable written in the file (for experiments) """ # Variables grids_file = gdir.get_filepath('gridded_data') # See if we have the masks, else compute them with utils.ncDataset(grids_file) as nc: has_masks = 'glacier_ext_erosion' in nc.variables if not has_masks: from oggm.core.gis import gridded_attributes gridded_attributes(gdir) with utils.ncDataset(grids_file) as nc: glacier_mask = nc.variables['glacier_mask'][:] glacier_ext = nc.variables['glacier_ext_erosion'][:] ice_divides = nc.variables['ice_divides'][:] if add_slope: slope_factor = nc.variables['slope_factor'][:] else: slope_factor = 1. # Thickness to interpolate thick = glacier_ext * np.NaN thick[(glacier_ext - ice_divides) == 1] = 0. # TODO: domain border too, for convenience for a start thick[0, :] = 0. thick[-1, :] = 0. thick[:, 0] = 0. thick[:, -1] = 0. # Along the lines cls = gdir.read_pickle('inversion_output') fls = gdir.read_pickle('inversion_flowlines') vs = [] for cl, fl in zip(cls, fls): vs.extend(cl['volume']) x, y = utils.tuple2int(fl.line.xy) thick[y, x] = cl['thick'] init_vol = np.sum(vs) # Interpolate xx, yy = gdir.grid.ij_coordinates pnan = np.nonzero(~np.isfinite(thick)) pok = np.nonzero(np.isfinite(thick)) points = np.array((np.ravel(yy[pok]), np.ravel(xx[pok]))).T inter = np.array((np.ravel(yy[pnan]), np.ravel(xx[pnan]))).T thick[pnan] = griddata(points, np.ravel(thick[pok]), inter, method='cubic') utils.clip_min(thick, 0, out=thick) # Slope thick *= slope_factor # Smooth dx = gdir.grid.dx if smooth_radius != 0: if smooth_radius is None: smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx) thick = gaussian_blur(thick, np.int(smooth_radius)) thick = np.where(glacier_mask, thick, 0.) # Re-mask thick[glacier_mask == 0] = np.NaN assert np.all(np.isfinite(thick[glacier_mask == 1])) # Conserve volume tmp_vol = np.nansum(thick * dx**2) thick *= init_vol / tmp_vol # write grids_file = gdir.get_filepath('gridded_data') with utils.ncDataset(grids_file, 'a') as nc: vn = 'distributed_thickness' + varname_suffix if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ( 'y', 'x', ), zlib=True) v.units = '-' v.long_name = 'Distributed ice thickness' v[:] = thick return thick