def forward(self, dec_input, enc_output, slf_attn_mask=None, dec_enc_attn_mask=None, label_vec=None): stop() enc_dec_input = torch.cat((enc_output, dec_input), 1) support = torch.bmm(enc_dec_input, self.weight.repeat(enc_dec_input.size(0), 1, 1)) enc_dec_mask = torch.cat((dec_enc_attn_mask, torch.zeros(dec_input.size(1), dec_input.size(1)).cuda())) output = torch.bmm(slf_attn_mask.repeat(support.size(0), 1, 1), support) if self.bias1 is not None: output = output + self.bias1 if slf_attn_mask is not None: slf_attn_mask = torch.zeros(dec_input.size(1), dec_input.size(1)).cuda() slf_attn_mask = utils.swap_0_1(slf_attn_mask, 1, 0) support = torch.bmm(dec_input, self.weight.repeat(dec_input.size(0), 1, 1)) output = torch.bmm(slf_attn_mask.repeat(support.size(0), 1, 1), support) if self.bias2 is not None: return output + self.bias2, None, None else: return output, None, None
def convert_instance_to_idx_seq(word_insts, word2idx): '''Word mapping to idx''' try: word_insts = ['</s>' if x is None else x for x in word_insts] return [[word2idx[w] if w in word2idx else Constants.UNK for w in s] for s in word_insts] except: print('error in preprocess.py') stop()
def doreg(scene, r, d): ## scene(ny,nx), image to be destretched ## r, d[2,:,:], reference and actual displacements of control points ## B-spline method xy = bspline(scene, r, d) ans = bilin (scene, xy) # bi-linear interpolation stop() return ans
def downloadProjectBam(project, myAPI, dryRun, samples=[], force=False, qp=QueryParameters.QueryParameters({'Limit':1024})): totalSize = 0 results = project.getAppResults(myAPI, qp) for result in results: bams = [ x for x in result.getFiles(myAPI, qp) if "bam" in str(x) ] if samples: if type(samples[0]) == str: samples = stringsToBSObj(project.getSamples(myAPI, qp), samples) # user picked particular samples # subset the list of bams accordingly #bams = [x for x in bams if ] #WIP print("\n\nuser picked particular samples, but this isn't coded in yet\n") stop() savePath = str(project).replace(" ","_") + "/" + pathFromFile(bams[0], myAPI) tmpPath = str(project).replace(" ","_") + "/" + pathFromFile(bams[0], myAPI) + "/partial/" if not os.path.exists(savePath): os.makedirs(savePath) if not os.path.exists(tmpPath): os.makedirs(tmpPath) for fn in bams: thisSize = fn.__dict__['Size'] # totalSize += thisSize if dryRun: totalSize += thisSize print(humanFormat(thisSize) + '\t' + fn.Name) continue # savePath = str(project).replace(" ","_") + "/" + pathFromFile(fn, myAPI) # tmpPath = str(project).replace(" ","_") + "/" + pathFromFile(fn, myAPI) + "partial/" # if not os.path.exists(savePath): # os.makedirs(savePath) # if not os.path.exists(tmpPath): # os.makedirs(tmpPath) pathToFn = os.path.join(savePath, fn.Name) if not force and fileExists(pathToFn, fn): print("already have " + savePath + "/" + fn.Name + ". Skipping...") continue else: while os.path.exists(os.path.join(savePath, fn.Name)): # if the path exists, append this string to the end to avoid overwriting counter = 1 fn.Name = os.path.basename(fn.Path) + "." + str(counter) counter += 1 print(os.path.join(savePath, fn.Name)) totalSize += thisSize fn.downloadFile(myAPI, tmpPath) shutil.move(os.path.join(tmpPath, os.path.split(fn.Path)[1] ) , os.path.join(savePath,fn.Name) ) if os.path.exists(tmpPath) and not os.listdir(tmpPath): os.rmdir(tmpPath) if not dryRun: downloadProjectMetadata(project, myAPI, samples=samples, outdir=savePath) print( humanFormat(totalSize) + '\t' + str(project) ) return totalSize
def perkins_skill(data1, data2, Binsize): Min=np.nanmin([np.nanmin(data1),np.nanmin(data2)]) Max=np.nanmax([np.nanmax(data1),np.nanmax(data2)]) hist, bin_edges = np.histogram(data1[~np.isnan(data1)],bins=np.arange(Min,Max,Binsize),density=True) pdf1 = hist*np.diff(bin_edges) try: histEx, bin_edgesEx = np.histogram(data2,bins=np.arange(Min,Max,Binsize),density=True) except: stop() pdf2 = histEx*np.diff(bin_edgesEx) mins = np.minimum(pdf1,pdf2) ss = np.nansum(mins) return ss
def forward(self, q, k, v, attn_mask=None, stop_sig=False): attn = torch.bmm(q, k.transpose(1, 2)) attn = attn / self.temperature if attn_mask is not None: attn = attn.masked_fill(attn_mask, -np.inf) if stop_sig: print('**') stop() attn = self.attn_type(attn) #attn = self.dropout(attn) output = torch.bmm(attn, v) return output, attn
def bilin(scene, xy): ## might be able to use opencv remap function here for the interpolation xr = np.arange(0,scene.shape[1]) yr = np.arange(0,scene.shape[0]) xxr, yyr = np.meshgrid(yr,xr) points = np.zeros((scene.size,2)) points[:,1] = xy[1,:,:].ravel() points[:,0] = xy[0,:,:].ravel() values = scene.ravel() grid_z0 = griddata(points, values, (xxr, yyr), method = 'nearest') stop() return None
def build_vocab_idx(word_insts, min_word_count, use_bos_eos=True): ''' Trim vocab by number of occurence ''' try: full_vocab = set(w for sent in word_insts for w in sent) except: stop() print('[Info] Original Vocabulary size =', len(full_vocab)) if use_bos_eos: word2idx = { Constants.BOS_WORD: Constants.BOS, Constants.EOS_WORD: Constants.EOS, Constants.PAD_WORD: Constants.PAD, Constants.UNK_WORD: Constants.UNK } else: word2idx = {} word_count = {w: 0 for w in full_vocab} for sent in word_insts: for word in sent: word_count[word] += 1 ignored_word_count = 0 word_count = sorted(word_count.items(), key=operator.itemgetter(1), reverse=True) word_count = collections.OrderedDict(word_count) for word, count in word_count.items(): if word not in word2idx: if count > min_word_count: word2idx[word] = len(word2idx) else: ignored_word_count += 1 print('[Info] Trimmed vocabulary size = {},'.format(len(word2idx)), 'each with minimum occurrence = {}'.format(min_word_count)) print("[Info] Ignored word count = {}".format(ignored_word_count)) return word2idx
BreakFlag[:, st, va] = Breaks # CALCULATE MONTHLY AVERAGES Monthly_RO = np.zeros( (len(iYearsRS), 12, RS_All.shape[1], RS_All.shape[2])) Monthly_RO[:] = np.nan for yy in range(len(iYearsRS)): for mo in range(12): MM = ((RSTimeDD.year == iYearsRS[yy]) & ((mo + 1) == RSTimeDD.month)) Monthly_RO[yy, mo, :, :] = np.nanmean(RS_All[MM, :, :], axis=0) NaNact = np.sum(~np.isnan(RS_All[MM, :, :]), axis=0) / np.sum(MM) try: Monthly_RO[yy, mo, (NaNact < MinCov)] = np.nan except: stop() # condition cloud properties on rainy days PRcondit = RS_All[MM, :, :][:, :, (PRconditioning == 1)] PRcondit[(np.squeeze(PR_RS[MM, :]) < 1), :] = np.nan # NotNAN=~np.isnan(Monthly_RO[yy,mo,:,(PRconditioning==1)]) Monthly_RO[yy, mo, :, (PRconditioning == 1)] = np.nan Monthly_RO[yy, mo, :, (PRconditioning == 1)] = np.transpose( np.nanmean(PRcondit[:, :, :], axis=0)) # OK=(np.nanmax(BreakFlag[:,:,20], axis=0) < 0.5) # plt.plot(np.nanmean(Monthly_RO[:,:,OK,20], axis=(1,2))); plt.show() # SAVE THE DATA FOR FURTHER PROCESSING np.savez(SaveFile, RSTimeMM=RSTimeMM, RS_Lon=RS_Lon, RS_Lat=RS_Lat,
def XWT(training_predictors, # predictor variables that are used to train the model testing_predictors, # predictor variables that are used to evaluate the model training_predictant, # predictent variable that is uesed to train the model testing_predictant, # predictent variable that is uesed to evaluate the model training_time, # daily time vector for the training dataset testing_time, # daily time vector for the testing datast extreme_nr, # Nr. of extreme events considered smoothing_radius, # smoothing radius applied to predictor fields ClusterMeth='HandK'): # current options are ['HandK','hdbscan'] # OPTIONAL INPUTS MinDistDD=7 # min. distance between two extremes in days RelAnnom=1 # 1 means that the clustering is performed based on reltaive anomalies RemoveAnnualCycl=1 # 1 means that the annual cycle will be removed before the clustering NormalizeData=1 # 1 means that the each record will be normalized according to its spatial characteristics. This automatically removes the annual cycle. sPlotDir=None YYYY_stamp=None from Functions_Extreme_WTs import ExtremeDays rgiExtrTrain=ExtremeDays(training_predictant,extreme_nr,MinDistDD) ExtrTrainDays=training_time[rgiExtrTrain] rgiExtrEval=np.argsort(testing_predictant)[-extreme_nr:] # rgiExtrEval=ExtremeDays(testing_predictant,extreme_nr,MinDistDD) ExtrEvalDays=testing_time[rgiExtrEval] from Functions_Extreme_WTs import PreprocessWTdata training_predictors=PreprocessWTdata(training_predictors, # WT data [time,lat,lon,var] RelAnnom=RelAnnom, # calculate relative anomalies [1-yes; 0-no] SmoothSigma=smoothing_radius, # Smoothing stddev (Gaussian smoothing) RemoveAnnualCycl=RemoveAnnualCycl, # remove annual cycle [1-yes; 0-no] NormalizeData=NormalizeData) # normalize data [1-yes; 0-no] from Functions_Extreme_WTs import GetExtremeDays rgrWTdata=GetExtremeDays(training_predictors, training_time, ExtrTrainDays) # ################################################ # #### Run Hirarchical clustering from Functions_Extreme_WTs import ClusterAnalysis rgrClustersFin=ClusterAnalysis(rgrWTdata, sPlotDir, extreme_nr, YYYY_stamp, Plot=0, ClusterMeth=ClusterMeth) # ################################################ # #### Prepare evaluation data DailyVarsEvalNorm=PreprocessWTdata(testing_predictors, # WT data [time,lat,lon,var] RelAnnom=RelAnnom, # calculate relative anomalies [1-yes; 0-no] SmoothSigma=smoothing_radius, # Smoothing stddev (Gaussian smoothing) RemoveAnnualCycl=RemoveAnnualCycl, # remove annual cycle [1-yes; 0-no] NormalizeData=NormalizeData) # normalize data [1-yes; 0-no] # ################################################ # ###### EUCLEDIAN DISTANCES from Functions_Extreme_WTs import EucledianDistance EucledianDist, Correlation =EucledianDistance(DailyVarsEvalNorm, rgrClustersFin) from Functions_Extreme_WTs import Scatter_ED_PR MinDistance=np.min(EucledianDist, axis=1) ClosestWT=np.argmin(EucledianDist, axis=1) MaxCorr=np.max(Correlation, axis=1) # Scatter_ED_PR(MinDistance, # ClosestWT, # Peval, # rgrNrOfExtremes, # PlotLoc=sPlotDir, # PlotName='Scatter_'+sRegion+'_NrExt-'+str(rgrNrOfExtremes)+'_Smooth-'+str(SpatialSmoothing)+'_AnnCy-'+Annual_Cycle+'_'+VarsJoint+'_'+sMonths+'_'+Samples[ss]+'.pdf') # Calculate the skill scores from Functions_Extreme_WTs import MRR, MRD, perkins_skill # Perkins Skill Score try: grPSS=perkins_skill(MinDistance,MinDistance[rgiExtrEval], 0.5) except: stop() # Mean relative difference grMRD=MRD(MinDistance,testing_predictant,rgiExtrEval) # Mean Rank Ratio grMRR=MRR(MinDistance,rgiExtrEval) # % of days excluded grExluded=(1-np.sum(MinDistance < np.nanpercentile(MinDistance[rgiExtrEval],75))/float(len(MinDistance)))*100. # calculate the AUC from sklearn.metrics import roc_auc_score testy=(testing_predictant >= np.sort(testing_predictant)[-extreme_nr]) probs=(MinDistance-np.min(MinDistance)); probs=np.abs((probs/probs.max())-1) try: auc = roc_auc_score(testy, probs) except: auc=np.nan # Calculate the Average precision-recall score from sklearn.metrics import average_precision_score from sklearn import svm, datasets try: average_precision = average_precision_score(testy, probs) except: average_precision = np.nan # print("--- Summary of performance ---") # print(" PSS: "+str(np.round(grPSS,2))) # print(" MRD: "+str(np.round(grMRD,2))) # print(" MRR: "+str(np.round(grMRR,2))) # print(" Excluded: "+str(np.round(grExluded,2))) # print(" AUC: "+str(np.round(auc,2))) # print(" APR: "+str(np.round(average_precision,2))) # print("------------------------------") XWT_output={'grClustersFin':rgrClustersFin, 'grEucledianDist':MinDistance, 'EucledianDistAllWTs':EucledianDist, 'grCorrelatio':MaxCorr, 'grCorrelatioAllWTs':Correlation, 'grPSS':grPSS, 'grMRD':grMRD, 'grMRR':grMRR, 'APR':average_precision, 'AUC':auc, 'PEX':grExluded, 'grExluded':grExluded} return XWT_output
def GaugeDepth(config): ''' assess the depth of each sample at the given regions ''' startTime = time.clock() regionDict = defaultdict(set) for item in config.regions: if str(str(item).split('.') [-1]).lower() == 'bed': # this item is a bed file regionDict = parseRegionBed(item, regionDict) elif str( str(item).split(':')[0]).startswith('chr'): # this is a string reg_chr = str(item.split(':')[0]) try: reg_str = str(str(item.split(':')[1]).split('-')[0]) reg_end = str(str(item.split(':')[1]).split('-')[1]) name = str(reg_chr) + ":" + str(reg_str) + "-" + str(reg_end) except IndexError: # represent whole chromosome regions [ex: chr2] by chrN:0-0 in the region dictionary reg_str = 1 reg_end = 1E9 name = str(reg_chr) regionDict[reg_chr].add((reg_str, reg_end, name)) if not regionDict: abortWithMessage("Regions not set!") covD = { 'chr': [], 'start': [], 'stop': [], 'name': [], 'sample': [], 'depth': [] } print("\n=== Reading BAM Files ===") for sid, fns in config.bams.items(): # loop over all samples for fn in fns: # loop over all bam files for this sample try: samfile = pysam.AlignmentFile(fn, "rb") except ValueError: throwWarning("Cannot open file {0}".format(fn)) continue for contig, ROI in regionDict.items(): for window in ROI: try: bed_name = window[2] except: stop() # make window 0 window = [int(window[0]) - 1, int(window[1])] # loop over all ROIs, checking this bam if config.p: #point method tmp_dict = {} position = round( (window[1] - window[0]) / 2.0) + window[0] avg_covg = samfile.count(contig, position - 1, position) #for position in range(window[0],window[1]): # region = str(contig) + ':' + str(position) + '-' + str(position) # tmp_dict[position] = samfile.count(region=region) #avg_covg = np.mean(tmp_dict.values()) elif config.c: #read count method avg_covg = samfile.count(contig, window[0], window[1]) #note that "avg_covg" is only a name here - it is the total count of reads, not an average! else: #complete average method #''' tmp_dict = {} for position in range(window[0], window[1]): tmp_dict[position] = 0 for pileupcolumn in samfile.pileup(contig, window[0], window[1], stepper='all'): #loop over reads that hit the window locations and record coverage # 'stepper = all' yields mapped, primary, non-duplicate (identified by sam flag), QC pass reads try: tmp_dict[pileupcolumn.pos] tmp_dict[pileupcolumn.pos] = pileupcolumn.n except: #skip this position if it's not in the region dict continue avg_covg = np.mean(tmp_dict.values()) #''' ''' #this behaves erratically and does not produce the same number if run repeatedly # not sure how to use the function, but it could be faster than pileup counter = 0 for ct_cov in samfile.count_coverage(contig, window[0], window[1], read_callback = 'all'): for nt_arr in ct_cov: counter += int(nt_arr) stop() avg_covg = counter/float(window[1] - window[0]) ''' covD['chr'].append(str(contig)) covD['start'].append(int(window[0]) + 1) covD['stop'].append(int(window[1])) covD['name'].append(str(bed_name)) covD['sample'].append(str(sid)) covD['depth'].append(float(avg_covg)) samfile.close() totalTime = time.clock() - startTime print("{0:02d}:{1:02d}\t{2}".format(int(totalTime / 60), int(totalTime % 60), fn)) covDF = pd.DataFrame.from_dict(covD)[[ 'chr', 'start', 'stop', 'name', 'sample', 'depth' ]] covDF = covDF.groupby(['chr', 'start', 'stop', 'name', 'sample'])['depth'].apply(sum).reset_index() totalTime = time.clock() - startTime print("\n{0:02d}:{1:02d}\t{2}".format(int(totalTime / 60), int(totalTime % 60), "Done")) return covDF
def bspline(scene, rdisp, disp): # destretch scene using B-splines # Foley & Van Dam: pp 521-536 # returns coordinates for scene(ny,nx) destretch ds = rdisp[0,0,1] - rdisp[0,0,0] dt = rdisp[1,1,0] - rdisp[1,0,0] # perform a B-spline 2D interpolation of the control # point offsets # first extend the control tie points to cover full scene rdispn, dispn = extend(rdisp, disp) Rx = rdispn[0,:,:] Ry = rdispn[1,:,:] Px = dispn[0,:,:] Py = dispn[1,:,:] # implement b-spline interpolation Ms = np.array([-1,3,-3,1, 3,-6,0,4, -3,3,3,1, 1,0,0,0])/6. Ms = Ms.reshape(4,4) Ms = np.mat(Ms) MsT = Ms.T ans = np.zeros((2,scene.shape[0],scene.shape[1])) for v in np.arange(disp.shape[1]+3): t0 = Ry[v+1,1] tn = Ry[v+2,1] if (tn <= 0) or (t0 > scene.shape[0]-1): continue t0 = np.max([t0,0]) tn = np.min([tn,scene.shape[0]-1]) ta = np.arange(tn-t0)/dt + (t0 - Ry[v+1,1])/dt for u in np.arange(disp.shape[2]+3): s0 = Rx[v+1,u+1] sn = Rx[v+1,u+2] if (sn <=0) or (s0 >= disp.shape[2]-1): continue s0 = np.max([s0,0]) sn = np.min([sn,disp.shape[2]-1]) sa = np.arange(sn-s0)/ds + (s0 -Rx[v+1,u+1])/ds compx = Ms * np.mat(Px[v:v+4,u:u+4]) * MsT compy = Ms * np.mat(Py[v:v+4,u:u+4]) * MsT ans[t0:tn,s0:sn] = patch(compx,compy, sa, ta) stop() stop() ''' # first extend the control points to edge of field xr = np.append(np.append(0,rdisp[0,0,:]),scene.shape[1]-1) yr = np.append(np.append(0,rdisp[1,:,0]),scene.shape[0]-1) xxr, yyr = np.meshgrid(yr,xr) zxd = np.zeros((yr.shape[0],xr.shape[0])) zxd[1:-1,1:-1] = disp[0,:,:] zxd[:,0] = 0. zxd[:,-1] = scene.shape[1]-1 zxd[0,:] = zxd[1,:] zxd[-1,:] = zxd[-2,:] zyd = np.zeros((yr.shape[0],xr.shape[0])) zyd[1:-1,1:-1] = disp[1,:,:] zyd[:,0] = zyd[:,1] zyd[:,-1] = zyd[:,-2] zyd[0,:] = 0. zyd[-1,:] = scene.shape[0]-1 zxd_spline = RectBivariateSpline(yr,xr,zxd,kx = 3, ky = 3) zyd_spline = RectBivariateSpline(yr,xr,zyd,kx = 3, ky = 3) xn = np.arange(scene.shape[1]) yn = np.arange(scene.shape[0]) xxn, yyn = np.meshgrid(yn,xn) zxn_int = zxd_spline.ev(xxn.flatten(),yyn.flatten()) zxn_int = zxn_int.reshape(scene.shape).T zyn_int = zyd_spline.ev(xxn.flatten(),yyn.flatten()) zyn_int = zyn_int.reshape(scene.shape).T # Correct the edges #plt.clf(),plt.imshow(zxn_int - xxn,cmap = plt.cm.gray,vmin = -11,vmax =11),plt.pause(0.01) #plt.clf(),plt.imshow(zyn_int - yyn,cmap = plt.cm.gray,vmin = -11,vmax =11),plt.pause(0.01) ans = np.zeros((2,scene.shape[1],scene.shape[0])) ans[0,:,:] = zxn_int ans[1,:,:] = zyn_int ''' return ans
def calculategeoh(z, lnsp, ts, qs, levels): heighttoreturn=np.full([ts.shape[0],ts.shape[1],ts.shape[2]], -999, np.double) geotoreturn=np.copy(heighttoreturn) Rd = 287.06 z_h = 0 #surface pressure sp = np.exp(lnsp) # A and B parameters to calculate pressures for model levels, # extracted from an ECMWF ERA-Interim GRIB file and then hardcoded here # pv = [ # 0.0000000000e+000, 2.0000000000e+001, 3.8425338745e+001, 6.3647796631e+001, 9.5636962891e+001, # 1.3448330688e+002, 1.8058435059e+002, 2.3477905273e+002, 2.9849584961e+002, 3.7397192383e+002, # 4.6461816406e+002, 5.7565112305e+002, 7.1321801758e+002, 8.8366040039e+002, 1.0948347168e+003, # 1.3564746094e+003, 1.6806403809e+003, 2.0822739258e+003, 2.5798886719e+003, 3.1964216309e+003, # 3.9602915039e+003, 4.9067070313e+003, 6.0180195313e+003, 7.3066328125e+003, 8.7650546875e+003, # 1.0376125000e+004, 1.2077445313e+004, 1.3775324219e+004, 1.5379804688e+004, 1.6819472656e+004, # 1.8045183594e+004, 1.9027695313e+004, 1.9755109375e+004, 2.0222203125e+004, 2.0429863281e+004, # 2.0384480469e+004, 2.0097402344e+004, 1.9584328125e+004, 1.8864750000e+004, 1.7961359375e+004, # 1.6899468750e+004, 1.5706449219e+004, 1.4411125000e+004, 1.3043218750e+004, 1.1632757813e+004, # 1.0209500000e+004, 8.8023554688e+003, 7.4388046875e+003, 6.1443164063e+003, 4.9417773438e+003, # 3.8509133301e+003, 2.8876965332e+003, 2.0637797852e+003, 1.3859125977e+003, 8.5536181641e+002, # 4.6733349609e+002, 2.1039389038e+002, 6.5889236450e+001, 7.3677425385e+000, 0.0000000000e+000, # 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, # 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, # 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, # 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, # 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, # 7.5823496445e-005, 4.6139489859e-004, 1.8151560798e-003, 5.0811171532e-003, 1.1142909527e-002, # 2.0677875727e-002, 3.4121163189e-002, 5.1690407097e-002, 7.3533833027e-002, 9.9674701691e-002, # 1.3002252579e-001, 1.6438430548e-001, 2.0247590542e-001, 2.4393314123e-001, 2.8832298517e-001, # 3.3515489101e-001, 3.8389211893e-001, 4.3396294117e-001, 4.8477154970e-001, 5.3570991755e-001, # 5.8616840839e-001, 6.3554745913e-001, 6.8326860666e-001, 7.2878581285e-001, 7.7159661055e-001, # 8.1125342846e-001, 8.4737491608e-001, 8.7965691090e-001, 9.0788388252e-001, 9.3194031715e-001, # 9.5182150602e-001, 9.6764522791e-001, 9.7966271639e-001, 9.8827010393e-001, 9.9401944876e-001, # 9.9763011932e-001, 1.0000000000e+000 ] # These are simple the a and b parameter appended into one linst! pv = [0,2.00004,3.980832,7.387186,12.908319,21.413612,33.952858,51.746601,76.167656,108.715561,150.986023,204.637451,271.356506,352.824493,450.685791,566.519226,701.813354,857.945801,1036.166504,1237.585449,1463.16394,1713.709595,1989.87439,2292.155518,2620.898438,2976.302246,3358.425781,3767.196045,4202.416504,4663.776367,5150.859863,5663.15625,6199.839355,6759.727051,7341.469727,7942.92627,8564.624023,9208.305664,9873.560547,10558.88184,11262.48438,11982.66211,12713.89746,13453.22559,14192.00977,14922.68555,15638.05371,16329.56055,16990.62305,17613.28125,18191.0293,18716.96875,19184.54492,19587.51367,19919.79688,20175.39453,20348.91602,20434.1582,20426.21875,20319.01172,20107.03125,19785.35742,19348.77539,18798.82227,18141.29688,17385.5957,16544.58594,15633.56641,14665.64551,13653.21973,12608.38379,11543.16699,10471.31055,9405.222656,8356.25293,7335.164551,6353.920898,5422.802734,4550.21582,3743.464355,3010.146973,2356.202637,1784.854614,1297.656128,895.193542,576.314148,336.772369,162.043427,54.208336,6.575628,0.00316,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000014,0.000055,0.000131,0.000279,0.000548,0.001,0.001701,0.002765,0.004267,0.006322,0.009035,0.012508,0.01686,0.022189,0.02861,0.036227,0.045146,0.055474,0.067316,0.080777,0.095964,0.112979,0.131935,0.152934,0.176091,0.20152,0.229315,0.259554,0.291993,0.326329,0.362203,0.399205,0.436906,0.475016,0.51328,0.551458,0.589317,0.626559,0.662934,0.698224,0.732224,0.764679,0.795385,0.824185,0.85095,0.875518,0.897767,0.917651,0.935157,0.950274,0.963007,0.973466,0.982238,0.989153,0.994204,0.99763,1] levelSize=len(levels) #60 A = pv[0:levelSize+1] B = pv[levelSize+1:] Ph_levplusone = A[levelSize] + (B[levelSize]*sp) #Get a list of level numbers in reverse order reversedlevels=np.full(levels.shape[0], -999, np.int32) for iLev in list(reversed(range(levels.shape[0]))): reversedlevels[levels.shape[0] - 1 - iLev] = levels[iLev] #Integrate up into the atmosphere from lowest level for lev in reversedlevels: #lev is the level number 1-60, we need a corresponding index into ts and qs ilevel=np.where(levels==lev)[0][0] t_level=np.squeeze(ts[ilevel,:,:]) q_level=np.squeeze(qs[ilevel,:,:]) #compute moist temperature t_level = t_level * (1.+0.609133*q_level) #compute the pressures (on half-levels) Ph_lev = A[lev-1] + (B[lev-1] * sp) if lev == 1: dlogP = np.log(Ph_levplusone/0.1) alpha = np.log(2) else: dlogP = np.log(Ph_levplusone/Ph_lev) dP = Ph_levplusone-Ph_lev alpha = 1. - ((Ph_lev/dP)*dlogP) TRd = t_level*Rd # z_f is the geopotential of this full level # integrate from previous (lower) half-level z_h to the full level try: z_f = z_h + (TRd*alpha) except: stop() #Convert geopotential to height heighttoreturn[ilevel,:,:] = z_f / 9.80665 #Geopotential (add in surface geopotential) try: geotoreturn[:,:,:] = z_f + z except: stop() # z_h is the geopotential of 'half-levels' # integrate z_h to next half level z_h=z_h+(TRd*dlogP) Ph_levplusone = Ph_lev return geotoreturn, heighttoreturn
def calculategeoh(z, lnsp, ts, qs, levels): heighttoreturn=np.full([ts.shape[0],ts.shape[1],ts.shape[2],ts.shape[3]], -999, np.double) geotoreturn=np.copy(heighttoreturn) Rd = 287.06 z_h = 0 #surface pressure sp = np.exp(lnsp) # A and B parameters to calculate pressures for model levels, # extracted from an ECMWF ERA-Interim GRIB file and then hardcoded here pv = [ 0.0000000000e+000, 2.0000000000e+001, 3.8425338745e+001, 6.3647796631e+001, 9.5636962891e+001, 1.3448330688e+002, 1.8058435059e+002, 2.3477905273e+002, 2.9849584961e+002, 3.7397192383e+002, 4.6461816406e+002, 5.7565112305e+002, 7.1321801758e+002, 8.8366040039e+002, 1.0948347168e+003, 1.3564746094e+003, 1.6806403809e+003, 2.0822739258e+003, 2.5798886719e+003, 3.1964216309e+003, 3.9602915039e+003, 4.9067070313e+003, 6.0180195313e+003, 7.3066328125e+003, 8.7650546875e+003, 1.0376125000e+004, 1.2077445313e+004, 1.3775324219e+004, 1.5379804688e+004, 1.6819472656e+004, 1.8045183594e+004, 1.9027695313e+004, 1.9755109375e+004, 2.0222203125e+004, 2.0429863281e+004, 2.0384480469e+004, 2.0097402344e+004, 1.9584328125e+004, 1.8864750000e+004, 1.7961359375e+004, 1.6899468750e+004, 1.5706449219e+004, 1.4411125000e+004, 1.3043218750e+004, 1.1632757813e+004, 1.0209500000e+004, 8.8023554688e+003, 7.4388046875e+003, 6.1443164063e+003, 4.9417773438e+003, 3.8509133301e+003, 2.8876965332e+003, 2.0637797852e+003, 1.3859125977e+003, 8.5536181641e+002, 4.6733349609e+002, 2.1039389038e+002, 6.5889236450e+001, 7.3677425385e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 7.5823496445e-005, 4.6139489859e-004, 1.8151560798e-003, 5.0811171532e-003, 1.1142909527e-002, 2.0677875727e-002, 3.4121163189e-002, 5.1690407097e-002, 7.3533833027e-002, 9.9674701691e-002, 1.3002252579e-001, 1.6438430548e-001, 2.0247590542e-001, 2.4393314123e-001, 2.8832298517e-001, 3.3515489101e-001, 3.8389211893e-001, 4.3396294117e-001, 4.8477154970e-001, 5.3570991755e-001, 5.8616840839e-001, 6.3554745913e-001, 6.8326860666e-001, 7.2878581285e-001, 7.7159661055e-001, 8.1125342846e-001, 8.4737491608e-001, 8.7965691090e-001, 9.0788388252e-001, 9.3194031715e-001, 9.5182150602e-001, 9.6764522791e-001, 9.7966271639e-001, 9.8827010393e-001, 9.9401944876e-001, 9.9763011932e-001, 1.0000000000e+000 ] levelSize=len(levels) #60 A = pv[0:levelSize+1] B = pv[levelSize+1:] Ph_levplusone = A[levelSize] + (B[levelSize]*sp) #Get a list of level numbers in reverse order reversedlevels=np.full(levels.shape[0], -999, np.int32) for iLev in list(reversed(range(levels.shape[0]))): reversedlevels[levels.shape[0] - 1 - iLev] = levels[iLev] #Integrate up into the atmosphere from lowest level for lev in reversedlevels: #lev is the level number 1-60, we need a corresponding index into ts and qs ilevel=np.where(levels==lev)[0][0] t_level=np.squeeze(ts[:,ilevel,:,:]) q_level=np.squeeze(qs[:,ilevel,:,:]) #compute moist temperature t_level = t_level * (1.+0.609133*q_level) #compute the pressures (on half-levels) Ph_lev = A[lev-1] + (B[lev-1] * sp) if lev == 1: dlogP = np.log(Ph_levplusone/0.1) alpha = np.log(2) else: dlogP = np.log(Ph_levplusone/Ph_lev) dP = Ph_levplusone-Ph_lev alpha = 1. - ((Ph_lev/dP)*dlogP) TRd = t_level*Rd # z_f is the geopotential of this full level # integrate from previous (lower) half-level z_h to the full level z_f = z_h + (TRd*alpha) #Convert geopotential to height heighttoreturn[:,ilevel] = z_f / 9.80665 #Geopotential (add in surface geopotential) try: geotoreturn[:,ilevel] = z_f + z[:,0,:,:] except: stop() # z_h is the geopotential of 'half-levels' # integrate z_h to next half level z_h=z_h+(TRd*dlogP) Ph_levplusone = Ph_lev return geotoreturn, heighttoreturn
def ReadCESMday(DaySel, Exp, iWest, iEast, iSouth, iNort, rgrTimeCESMFull, VARS=None, AddCells=0): """ Read in a single day within a region from one CESM large ensemble simulation All variables nescessary for a synopic mapplot are read in """ if VARS == None: rgsWTvars=['Z500','U850','V850','TMQ',] VarsFullName= ['Z500','U850','V850','PW'] rgsWTfolders=['/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/Z500/',\ '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/U850/',\ '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/V850/',\ '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/TMQ/'] else: rgsWTvars=VARS[0] VarsFullName=VARS[1] rgsWTfolders=VARS[2] s20Cname='b.e11.B20TRC5CNBDRD.f09_g16.' s21Cname='b.e11.BRCP85C5CNBDRD.f09_g16.' # start reading in the CESM data iRegionPlus=AddCells # grid cell added around shape rectangle ncid=Dataset('/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/PSL/b.e11.B20TRC5CNBDRD.f09_g16.001.cam.h1.PSL.18500101-20051231.nc', mode='r') rgrLonWT1D=np.squeeze(ncid.variables['lon'][:]) rgrLatWT1D=np.squeeze(ncid.variables['lat'][:]) ncid.close() rgrLonS=rgrLonWT1D[iWest-iRegionPlus:iEast+iRegionPlus] rgrLatS=rgrLatWT1D[iSouth-iRegionPlus:iNort+iRegionPlus] # Read the variables DataAll=np.zeros((len(rgrLatS),len(rgrLonS),len(rgsWTvars))); DataAll[:]=np.nan for va in range(len(rgsWTvars)): if DaySel.year < 2006: if Exp == '001': rgrTimeCESM=pd.date_range(datetime.date(1850, 1, 1), end=datetime.date(2005, 12, 31), freq='d') else: rgrTimeCESM=pd.date_range(datetime.date(1920, 1, 1), end=datetime.date(2005, 12, 31), freq='d') Cfiles=glob.glob(rgsWTfolders[va]+'/'+s20Cname+Exp+'*'+rgsWTvars[va]+'*')[0] if DaySel.year >= 2006: if int(Exp) >= 34: rgrTimeCESM=pd.date_range(datetime.date(2006, 1, 1), end=datetime.date(2100, 12, 31), freq='d') Cfiles=glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*')[0] elif DaySel.year <= 2080: rgrTimeCESM=pd.date_range(datetime.date(2006, 1, 1), end=datetime.date(2080, 12, 31), freq='d') try: Cfiles=np.sort(glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*'))[0] except: stop() elif DaySel.year >= 2081: rgrTimeCESM=pd.date_range(datetime.date(2081, 1, 1), end=datetime.date(2100, 12, 31), freq='d') Cfiles=np.sort(glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*'))[1] rgiNonLeap=np.where((rgrTimeCESM.month != 2) | (rgrTimeCESM.day != 29))[0] rgrTimeCESM=rgrTimeCESM[rgiNonLeap] iDDselect=np.where(rgrTimeCESM == DaySel)[0][0] try: ncid=Dataset(Cfiles, mode='r') DataAll[:,:,va]=np.squeeze(ncid.variables[rgsWTvars[va]][iDDselect,iSouth-iRegionPlus:iNort+iRegionPlus,iWest-iRegionPlus:iEast+iRegionPlus]) ncid.close() except: stop() return DataAll, rgrLonS, rgrLatS
def GaugeDepth(config) : ''' assess the depth of each sample at the given regions ''' startTime = time.clock() regionDict = defaultdict(set) for item in config.regions: if str(str(item).split('.')[-1]).lower() == 'bed': # this item is a bed file regionDict = parseRegionBed(item, regionDict) elif str(str(item).split(':')[0]).startswith('chr'): # this is a string reg_chr = str(item.split(':')[0]) try: reg_str = str(str(item.split(':')[1]).split('-')[0]) reg_end = str(str(item.split(':')[1]).split('-')[1]) name = str(reg_chr) + ":" + str(reg_str) + "-" + str(reg_end) except IndexError: # represent whole chromosome regions [ex: chr2] by chrN:0-0 in the region dictionary reg_str = 1 reg_end = 1E9 name = str(reg_chr) regionDict[reg_chr].add((reg_str, reg_end, name)) if not regionDict: abortWithMessage("Regions not set!") covD = {'chr':[], 'start':[], 'stop':[], 'name':[], 'sample':[],'depth':[]} print("\n=== Reading BAM Files ===") for sid, fns in config.bams.items(): # loop over all samples for fn in fns: # loop over all bam files for this sample try: samfile = pysam.AlignmentFile(fn, "rb" ) except ValueError: throwWarning("Cannot open file {0}".format(fn)) continue for contig, ROI in regionDict.items(): for window in ROI: try: bed_name = window[2] except: stop() # make window 0 window = [int(window[0]) - 1, int(window[1])] # loop over all ROIs, checking this bam if config.p: #point method tmp_dict = {} position = round((window[1] - window[0])/2.0) + window[0] avg_covg = samfile.count(contig, position - 1, position) #for position in range(window[0],window[1]): # region = str(contig) + ':' + str(position) + '-' + str(position) # tmp_dict[position] = samfile.count(region=region) #avg_covg = np.mean(tmp_dict.values()) elif config.c: #read count method avg_covg = samfile.count(contig, window[0], window[1]) #note that "avg_covg" is only a name here - it is the total count of reads, not an average! else: #complete average method #''' tmp_dict = {} for position in range(window[0],window[1]): tmp_dict[position] = 0 for pileupcolumn in samfile.pileup(contig, window[0], window[1],stepper='all'): #loop over reads that hit the window locations and record coverage # 'stepper = all' yields mapped, primary, non-duplicate (identified by sam flag), QC pass reads try: tmp_dict[pileupcolumn.pos] tmp_dict[pileupcolumn.pos] = pileupcolumn.n except: #skip this position if it's not in the region dict continue avg_covg = np.mean(tmp_dict.values()) #''' ''' #this behaves erratically and does not produce the same number if run repeatedly # not sure how to use the function, but it could be faster than pileup counter = 0 for ct_cov in samfile.count_coverage(contig, window[0], window[1], read_callback = 'all'): for nt_arr in ct_cov: counter += int(nt_arr) stop() avg_covg = counter/float(window[1] - window[0]) ''' covD['chr'].append(str(contig)) covD['start'].append(int(window[0]) + 1) covD['stop'].append(int(window[1])) covD['name'].append(str(bed_name)) covD['sample'].append(str(sid)) covD['depth'].append(float(avg_covg)) samfile.close() totalTime = time.clock() - startTime print("{0:02d}:{1:02d}\t{2}".format(int(totalTime/60), int(totalTime % 60), fn)) covDF = pd.DataFrame.from_dict(covD)[['chr','start','stop','name','sample','depth']] covDF = covDF.groupby(['chr','start','stop','name','sample'])['depth'].apply(sum).reset_index() totalTime = time.clock() - startTime print("\n{0:02d}:{1:02d}\t{2}".format(int(totalTime/60), int(totalTime % 60), "Done")) return covDF
import noddy2 from pdb import set_trace as stop if __name__ == '__main__': nn = noddy2.Noddy() aa = nn.session_id stop()