Esempio n. 1
0
    def forward(self,
                dec_input,
                enc_output,
                slf_attn_mask=None,
                dec_enc_attn_mask=None,
                label_vec=None):
        stop()

        enc_dec_input = torch.cat((enc_output, dec_input), 1)
        support = torch.bmm(enc_dec_input,
                            self.weight.repeat(enc_dec_input.size(0), 1, 1))

        enc_dec_mask = torch.cat((dec_enc_attn_mask,
                                  torch.zeros(dec_input.size(1),
                                              dec_input.size(1)).cuda()))
        output = torch.bmm(slf_attn_mask.repeat(support.size(0), 1, 1),
                           support)

        if self.bias1 is not None:
            output = output + self.bias1

        if slf_attn_mask is not None:
            slf_attn_mask = torch.zeros(dec_input.size(1),
                                        dec_input.size(1)).cuda()
        slf_attn_mask = utils.swap_0_1(slf_attn_mask, 1, 0)

        support = torch.bmm(dec_input,
                            self.weight.repeat(dec_input.size(0), 1, 1))
        output = torch.bmm(slf_attn_mask.repeat(support.size(0), 1, 1),
                           support)

        if self.bias2 is not None:
            return output + self.bias2, None, None
        else:
            return output, None, None
Esempio n. 2
0
def convert_instance_to_idx_seq(word_insts, word2idx):
    '''Word mapping to idx'''
    try:
        word_insts = ['</s>' if x is None else x for x in word_insts]
        return [[word2idx[w] if w in word2idx else Constants.UNK for w in s] for s in word_insts]
    except:

        print('error in preprocess.py')
        stop()
Esempio n. 3
0
def doreg(scene, r, d):
    ## scene(ny,nx), image to be destretched
    ## r, d[2,:,:], reference and actual displacements of control points 
    
    ## B-spline method 
    xy = bspline(scene, r, d)
    ans = bilin (scene, xy)      # bi-linear interpolation
    stop()
    
    return ans
def downloadProjectBam(project, myAPI, dryRun, samples=[], force=False, qp=QueryParameters.QueryParameters({'Limit':1024})):
    totalSize = 0
    results = project.getAppResults(myAPI, qp)
    for result in results:
        bams = [ x for x in result.getFiles(myAPI, qp) if "bam" in str(x) ]
        if samples:
            if type(samples[0]) == str:
                samples = stringsToBSObj(project.getSamples(myAPI, qp), samples)
            # user picked particular samples
            # subset the list of bams accordingly
            #bams = [x for x in bams if ]
            #WIP
            print("\n\nuser picked particular samples, but this isn't coded in yet\n")
            stop()
        savePath = str(project).replace(" ","_") + "/" + pathFromFile(bams[0], myAPI)
        tmpPath = str(project).replace(" ","_") + "/" + pathFromFile(bams[0], myAPI) + "/partial/"
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        if not os.path.exists(tmpPath):
            os.makedirs(tmpPath)
        for fn in bams:
            thisSize = fn.__dict__['Size']
            # totalSize += thisSize
            if dryRun:
                totalSize += thisSize
                print(humanFormat(thisSize) + '\t' + fn.Name)
                continue
            # savePath = str(project).replace(" ","_") + "/" + pathFromFile(fn, myAPI)
            # tmpPath = str(project).replace(" ","_") + "/" + pathFromFile(fn, myAPI) + "partial/"
            # if not os.path.exists(savePath):
            #     os.makedirs(savePath)
            # if not os.path.exists(tmpPath):
            #     os.makedirs(tmpPath)
            pathToFn = os.path.join(savePath, fn.Name)
            if not force and fileExists(pathToFn, fn):
                print("already have " + savePath + "/" + fn.Name + ". Skipping...")
                continue
            else:
                while os.path.exists(os.path.join(savePath, fn.Name)):
                    # if the path exists, append this string to the end to avoid overwriting
                    counter = 1
                    fn.Name = os.path.basename(fn.Path) + "." + str(counter)
                    counter += 1 
                print(os.path.join(savePath, fn.Name))
                totalSize += thisSize
                fn.downloadFile(myAPI, tmpPath)
                shutil.move(os.path.join(tmpPath, os.path.split(fn.Path)[1] ) , os.path.join(savePath,fn.Name) )
        if os.path.exists(tmpPath) and not os.listdir(tmpPath):
            os.rmdir(tmpPath)    
    if not dryRun:
        downloadProjectMetadata(project, myAPI, samples=samples, outdir=savePath)
    print( humanFormat(totalSize) + '\t' + str(project) )
    return totalSize
Esempio n. 5
0
def perkins_skill(data1, data2, Binsize):
    Min=np.nanmin([np.nanmin(data1),np.nanmin(data2)])
    Max=np.nanmax([np.nanmax(data1),np.nanmax(data2)])

    hist, bin_edges = np.histogram(data1[~np.isnan(data1)],bins=np.arange(Min,Max,Binsize),density=True)
    pdf1 = hist*np.diff(bin_edges)
    try:
        histEx, bin_edgesEx = np.histogram(data2,bins=np.arange(Min,Max,Binsize),density=True)
    except:
        stop()
    pdf2 = histEx*np.diff(bin_edgesEx)
    mins = np.minimum(pdf1,pdf2)
    ss = np.nansum(mins)
    return ss
Esempio n. 6
0
    def forward(self, q, k, v, attn_mask=None, stop_sig=False):
        attn = torch.bmm(q, k.transpose(1, 2))
        attn = attn / self.temperature

        if attn_mask is not None:
            attn = attn.masked_fill(attn_mask, -np.inf)

        if stop_sig:
            print('**')
            stop()

        attn = self.attn_type(attn)
        #attn = self.dropout(attn)
        output = torch.bmm(attn, v)

        return output, attn
Esempio n. 7
0
def bilin(scene, xy):
	
    ## might be able to use opencv remap function here for the interpolation 
    
    xr = np.arange(0,scene.shape[1])
    yr = np.arange(0,scene.shape[0])
    xxr, yyr = np.meshgrid(yr,xr)
	
    points = np.zeros((scene.size,2))
    points[:,1] = xy[1,:,:].ravel()
    points[:,0] = xy[0,:,:].ravel()
    values = scene.ravel()
     
    grid_z0 = griddata(points, values, (xxr, yyr), method = 'nearest')
   
    stop() 

    return None
Esempio n. 8
0
def build_vocab_idx(word_insts, min_word_count, use_bos_eos=True):
    ''' Trim vocab by number of occurence '''

    try:
        full_vocab = set(w for sent in word_insts for w in sent)
    except:
        stop()
    print('[Info] Original Vocabulary size =', len(full_vocab))

    if use_bos_eos:
        word2idx = {
            Constants.BOS_WORD: Constants.BOS,
            Constants.EOS_WORD: Constants.EOS,
            Constants.PAD_WORD: Constants.PAD,
            Constants.UNK_WORD: Constants.UNK
        }
    else:
        word2idx = {}

    word_count = {w: 0 for w in full_vocab}

    for sent in word_insts:
        for word in sent:
            word_count[word] += 1

    ignored_word_count = 0

    word_count = sorted(word_count.items(),
                        key=operator.itemgetter(1),
                        reverse=True)
    word_count = collections.OrderedDict(word_count)
    for word, count in word_count.items():
        if word not in word2idx:
            if count > min_word_count:
                word2idx[word] = len(word2idx)
            else:
                ignored_word_count += 1

    print('[Info] Trimmed vocabulary size = {},'.format(len(word2idx)),
          'each with minimum occurrence = {}'.format(min_word_count))
    print("[Info] Ignored word count = {}".format(ignored_word_count))
    return word2idx
                    BreakFlag[:, st, va] = Breaks

    # CALCULATE MONTHLY AVERAGES
    Monthly_RO = np.zeros(
        (len(iYearsRS), 12, RS_All.shape[1], RS_All.shape[2]))
    Monthly_RO[:] = np.nan
    for yy in range(len(iYearsRS)):
        for mo in range(12):
            MM = ((RSTimeDD.year == iYearsRS[yy]) &
                  ((mo + 1) == RSTimeDD.month))
            Monthly_RO[yy, mo, :, :] = np.nanmean(RS_All[MM, :, :], axis=0)
            NaNact = np.sum(~np.isnan(RS_All[MM, :, :]), axis=0) / np.sum(MM)
            try:
                Monthly_RO[yy, mo, (NaNact < MinCov)] = np.nan
            except:
                stop()
            # condition cloud properties on rainy days
            PRcondit = RS_All[MM, :, :][:, :, (PRconditioning == 1)]
            PRcondit[(np.squeeze(PR_RS[MM, :]) < 1), :] = np.nan
            # NotNAN=~np.isnan(Monthly_RO[yy,mo,:,(PRconditioning==1)])
            Monthly_RO[yy, mo, :, (PRconditioning == 1)] = np.nan
            Monthly_RO[yy, mo, :, (PRconditioning == 1)] = np.transpose(
                np.nanmean(PRcondit[:, :, :], axis=0))

    # OK=(np.nanmax(BreakFlag[:,:,20], axis=0) < 0.5)
    # plt.plot(np.nanmean(Monthly_RO[:,:,OK,20], axis=(1,2))); plt.show()
    # SAVE THE DATA FOR FURTHER PROCESSING
    np.savez(SaveFile,
             RSTimeMM=RSTimeMM,
             RS_Lon=RS_Lon,
             RS_Lat=RS_Lat,
Esempio n. 10
0
def XWT(training_predictors, # predictor variables that are used to train the model
    testing_predictors,      # predictor variables that are used to evaluate the model
    training_predictant,     # predictent variable that is uesed to train the model
    testing_predictant,      # predictent variable that is uesed to evaluate the model
    training_time,           # daily time vector for the training dataset
    testing_time,            # daily time vector for the testing datast
    extreme_nr,              # Nr. of extreme events considered
    smoothing_radius,        # smoothing radius applied to predictor fields
    ClusterMeth='HandK'):    # current options are ['HandK','hdbscan']

    #  OPTIONAL INPUTS
    MinDistDD=7 # min. distance between two extremes in days
    RelAnnom=1  # 1 means that the clustering is performed based on reltaive anomalies
    RemoveAnnualCycl=1 # 1 means that the annual cycle will be removed before the clustering
    NormalizeData=1 # 1 means that the each record will be normalized according to its spatial characteristics. This automatically removes the annual cycle.
    sPlotDir=None
    YYYY_stamp=None

    from Functions_Extreme_WTs import ExtremeDays
    rgiExtrTrain=ExtremeDays(training_predictant,extreme_nr,MinDistDD)
    ExtrTrainDays=training_time[rgiExtrTrain]
    rgiExtrEval=np.argsort(testing_predictant)[-extreme_nr:]
    # rgiExtrEval=ExtremeDays(testing_predictant,extreme_nr,MinDistDD)
    ExtrEvalDays=testing_time[rgiExtrEval]

    from Functions_Extreme_WTs import PreprocessWTdata
    training_predictors=PreprocessWTdata(training_predictors,            # WT data [time,lat,lon,var]
                               RelAnnom=RelAnnom,                     # calculate relative anomalies [1-yes; 0-no]
                               SmoothSigma=smoothing_radius,   # Smoothing stddev (Gaussian smoothing)
                               RemoveAnnualCycl=RemoveAnnualCycl,             # remove annual cycle [1-yes; 0-no]
                               NormalizeData=NormalizeData)                # normalize data [1-yes; 0-no]

    from Functions_Extreme_WTs import GetExtremeDays
    rgrWTdata=GetExtremeDays(training_predictors,
                             training_time,
                             ExtrTrainDays)
    
    # ################################################
    # ####  Run Hirarchical clustering
    from Functions_Extreme_WTs import ClusterAnalysis
    rgrClustersFin=ClusterAnalysis(rgrWTdata,
                                   sPlotDir,
                                   extreme_nr,
                                   YYYY_stamp,
                                   Plot=0,
                                   ClusterMeth=ClusterMeth)

    # ################################################
    # ####  Prepare evaluation data
    DailyVarsEvalNorm=PreprocessWTdata(testing_predictors,                  # WT data [time,lat,lon,var]
                                       RelAnnom=RelAnnom,                     # calculate relative anomalies [1-yes; 0-no]
                                       SmoothSigma=smoothing_radius,   # Smoothing stddev (Gaussian smoothing)
                                       RemoveAnnualCycl=RemoveAnnualCycl,             # remove annual cycle [1-yes; 0-no]
                                       NormalizeData=NormalizeData)                # normalize data [1-yes; 0-no]

    # ################################################
    # ######       EUCLEDIAN DISTANCES
    from Functions_Extreme_WTs import EucledianDistance
    EucledianDist, Correlation =EucledianDistance(DailyVarsEvalNorm,
                                                  rgrClustersFin)

    from Functions_Extreme_WTs import Scatter_ED_PR
    MinDistance=np.min(EucledianDist, axis=1)
    ClosestWT=np.argmin(EucledianDist, axis=1)
    MaxCorr=np.max(Correlation, axis=1)
    # Scatter_ED_PR(MinDistance,
    #               ClosestWT,
    #               Peval,
    #               rgrNrOfExtremes,
    #               PlotLoc=sPlotDir,
    #               PlotName='Scatter_'+sRegion+'_NrExt-'+str(rgrNrOfExtremes)+'_Smooth-'+str(SpatialSmoothing)+'_AnnCy-'+Annual_Cycle+'_'+VarsJoint+'_'+sMonths+'_'+Samples[ss]+'.pdf')

    # Calculate the skill scores
    from Functions_Extreme_WTs import MRR, MRD, perkins_skill
    # Perkins Skill Score
    try:
        grPSS=perkins_skill(MinDistance,MinDistance[rgiExtrEval], 0.5)
    except:
        stop()
    # Mean relative difference
    grMRD=MRD(MinDistance,testing_predictant,rgiExtrEval)
    # Mean Rank Ratio
    grMRR=MRR(MinDistance,rgiExtrEval)
    # % of days excluded
    grExluded=(1-np.sum(MinDistance < np.nanpercentile(MinDistance[rgiExtrEval],75))/float(len(MinDistance)))*100.

    # calculate the AUC
    from sklearn.metrics import roc_auc_score
    testy=(testing_predictant >= np.sort(testing_predictant)[-extreme_nr])
    probs=(MinDistance-np.min(MinDistance)); probs=np.abs((probs/probs.max())-1)
    try:
        auc = roc_auc_score(testy, probs)
    except:
        auc=np.nan

    # Calculate the Average precision-recall score
    from sklearn.metrics import average_precision_score
    from sklearn import svm, datasets
    try:
        average_precision = average_precision_score(testy, probs)
    except:
        average_precision = np.nan

    # print("--- Summary of performance ---")
    # print("    PSS: "+str(np.round(grPSS,2)))
    # print("    MRD: "+str(np.round(grMRD,2)))
    # print("    MRR: "+str(np.round(grMRR,2)))
    # print("    Excluded: "+str(np.round(grExluded,2)))
    # print("    AUC: "+str(np.round(auc,2)))
    # print("    APR: "+str(np.round(average_precision,2)))
    # print("------------------------------")

    XWT_output={'grClustersFin':rgrClustersFin, 
                'grEucledianDist':MinDistance, 
                'EucledianDistAllWTs':EucledianDist, 
                'grCorrelatio':MaxCorr,
                'grCorrelatioAllWTs':Correlation,
                'grPSS':grPSS,
                'grMRD':grMRD,
                'grMRR':grMRR,
                'APR':average_precision,
                'AUC':auc,
                'PEX':grExluded,
                'grExluded':grExluded}
    return XWT_output
Esempio n. 11
0
def GaugeDepth(config):
    '''
    assess the depth of each sample at the given regions
    '''

    startTime = time.clock()
    regionDict = defaultdict(set)
    for item in config.regions:
        if str(str(item).split('.')
               [-1]).lower() == 'bed':  # this item is a bed file
            regionDict = parseRegionBed(item, regionDict)
        elif str(
                str(item).split(':')[0]).startswith('chr'):  # this is a string
            reg_chr = str(item.split(':')[0])
            try:
                reg_str = str(str(item.split(':')[1]).split('-')[0])
                reg_end = str(str(item.split(':')[1]).split('-')[1])
                name = str(reg_chr) + ":" + str(reg_str) + "-" + str(reg_end)
            except IndexError:  # represent whole chromosome regions [ex: chr2] by chrN:0-0 in the region dictionary
                reg_str = 1
                reg_end = 1E9
                name = str(reg_chr)
            regionDict[reg_chr].add((reg_str, reg_end, name))
    if not regionDict:
        abortWithMessage("Regions not set!")

    covD = {
        'chr': [],
        'start': [],
        'stop': [],
        'name': [],
        'sample': [],
        'depth': []
    }
    print("\n=== Reading BAM Files ===")
    for sid, fns in config.bams.items():
        # loop over all samples
        for fn in fns:
            # loop over all bam files for this sample
            try:
                samfile = pysam.AlignmentFile(fn, "rb")
            except ValueError:
                throwWarning("Cannot open file {0}".format(fn))
                continue
            for contig, ROI in regionDict.items():
                for window in ROI:
                    try:
                        bed_name = window[2]
                    except:
                        stop()
                    # make window 0
                    window = [int(window[0]) - 1, int(window[1])]

                    # loop over all ROIs, checking this bam
                    if config.p:
                        #point method
                        tmp_dict = {}
                        position = round(
                            (window[1] - window[0]) / 2.0) + window[0]
                        avg_covg = samfile.count(contig, position - 1,
                                                 position)

                        #for position in range(window[0],window[1]):
                        #    region = str(contig) + ':' + str(position) + '-' + str(position)
                        #    tmp_dict[position] = samfile.count(region=region)
                        #avg_covg = np.mean(tmp_dict.values())
                    elif config.c:
                        #read count method
                        avg_covg = samfile.count(contig, window[0], window[1])
                        #note that "avg_covg" is only a name here - it is the total count of reads, not an average!
                    else:
                        #complete average method
                        #'''
                        tmp_dict = {}
                        for position in range(window[0], window[1]):
                            tmp_dict[position] = 0
                        for pileupcolumn in samfile.pileup(contig,
                                                           window[0],
                                                           window[1],
                                                           stepper='all'):
                            #loop over reads that hit the window locations and record coverage
                            # 'stepper = all' yields mapped, primary, non-duplicate (identified by sam flag), QC pass reads
                            try:
                                tmp_dict[pileupcolumn.pos]
                                tmp_dict[pileupcolumn.pos] = pileupcolumn.n
                            except:
                                #skip this position if it's not in the region dict
                                continue
                        avg_covg = np.mean(tmp_dict.values())
                        #'''
                        '''
                        #this behaves erratically and does not produce the same number if run repeatedly
                        #   not sure how to use the function, but it could be faster than pileup
                        counter = 0
                        for ct_cov in samfile.count_coverage(contig, window[0], window[1], read_callback = 'all'):
                            for nt_arr in ct_cov:
                                counter += int(nt_arr)
                        stop()
                        avg_covg = counter/float(window[1] - window[0])
                        '''

                    covD['chr'].append(str(contig))
                    covD['start'].append(int(window[0]) + 1)
                    covD['stop'].append(int(window[1]))
                    covD['name'].append(str(bed_name))
                    covD['sample'].append(str(sid))
                    covD['depth'].append(float(avg_covg))
            samfile.close()
            totalTime = time.clock() - startTime
            print("{0:02d}:{1:02d}\t{2}".format(int(totalTime / 60),
                                                int(totalTime % 60), fn))
    covDF = pd.DataFrame.from_dict(covD)[[
        'chr', 'start', 'stop', 'name', 'sample', 'depth'
    ]]
    covDF = covDF.groupby(['chr', 'start', 'stop', 'name',
                           'sample'])['depth'].apply(sum).reset_index()

    totalTime = time.clock() - startTime
    print("\n{0:02d}:{1:02d}\t{2}".format(int(totalTime / 60),
                                          int(totalTime % 60), "Done"))
    return covDF
Esempio n. 12
0
def bspline(scene, rdisp, disp):
    # destretch scene using B-splines
    # Foley & Van Dam:  pp 521-536
    # returns coordinates for scene(ny,nx) destretch

    ds = rdisp[0,0,1] - rdisp[0,0,0]
    dt = rdisp[1,1,0] - rdisp[1,0,0]
    
    # perform a B-spline 2D interpolation of the control 
    # point offsets

    # first extend the control tie points to cover full scene
    rdispn, dispn = extend(rdisp, disp)
    Rx = rdispn[0,:,:]
    Ry = rdispn[1,:,:]
    Px = dispn[0,:,:]
    Py = dispn[1,:,:]

    # implement b-spline interpolation

    Ms = np.array([-1,3,-3,1, 3,-6,0,4, -3,3,3,1, 1,0,0,0])/6.
    Ms = Ms.reshape(4,4)
    Ms = np.mat(Ms)
    MsT = Ms.T


    ans = np.zeros((2,scene.shape[0],scene.shape[1]))
    for v in np.arange(disp.shape[1]+3):
        t0 = Ry[v+1,1]
        tn = Ry[v+2,1]
        if (tn <= 0) or (t0 > scene.shape[0]-1):
            continue
        t0 = np.max([t0,0])
        tn = np.min([tn,scene.shape[0]-1])
        ta = np.arange(tn-t0)/dt + (t0 - Ry[v+1,1])/dt
        for u in np.arange(disp.shape[2]+3):
            s0 = Rx[v+1,u+1]
            sn = Rx[v+1,u+2]
            if (sn <=0) or (s0 >= disp.shape[2]-1):
                continue
            s0 = np.max([s0,0])
            sn = np.min([sn,disp.shape[2]-1])
            sa = np.arange(sn-s0)/ds + (s0 -Rx[v+1,u+1])/ds
            compx = Ms * np.mat(Px[v:v+4,u:u+4]) * MsT
            compy = Ms * np.mat(Py[v:v+4,u:u+4]) * MsT
            ans[t0:tn,s0:sn] = patch(compx,compy, sa, ta)
            
            
            stop()
                        
    stop()
    
    

    '''
    
    # first extend the control points to edge of field
    xr = np.append(np.append(0,rdisp[0,0,:]),scene.shape[1]-1)
    yr = np.append(np.append(0,rdisp[1,:,0]),scene.shape[0]-1)
    xxr, yyr = np.meshgrid(yr,xr)

    zxd = np.zeros((yr.shape[0],xr.shape[0]))
    zxd[1:-1,1:-1] = disp[0,:,:]
    zxd[:,0] = 0.
    zxd[:,-1] = scene.shape[1]-1
    zxd[0,:] = zxd[1,:]
    zxd[-1,:] = zxd[-2,:]

    zyd = np.zeros((yr.shape[0],xr.shape[0]))
    zyd[1:-1,1:-1] = disp[1,:,:]
    zyd[:,0]  = zyd[:,1]
    zyd[:,-1] = zyd[:,-2]
    zyd[0,:] = 0.
    zyd[-1,:] = scene.shape[0]-1
 
    zxd_spline = RectBivariateSpline(yr,xr,zxd,kx = 3, ky = 3)
    zyd_spline = RectBivariateSpline(yr,xr,zyd,kx = 3, ky = 3)

    xn = np.arange(scene.shape[1])
    yn = np.arange(scene.shape[0])
    xxn, yyn = np.meshgrid(yn,xn)
        
    zxn_int = zxd_spline.ev(xxn.flatten(),yyn.flatten())
    zxn_int = zxn_int.reshape(scene.shape).T
    
    zyn_int = zyd_spline.ev(xxn.flatten(),yyn.flatten())
    zyn_int = zyn_int.reshape(scene.shape).T

    # Correct the edges 
    #plt.clf(),plt.imshow(zxn_int - xxn,cmap = plt.cm.gray,vmin = -11,vmax =11),plt.pause(0.01)
    #plt.clf(),plt.imshow(zyn_int - yyn,cmap = plt.cm.gray,vmin = -11,vmax =11),plt.pause(0.01)
    
    ans = np.zeros((2,scene.shape[1],scene.shape[0]))
    ans[0,:,:] = zxn_int
    ans[1,:,:] = zyn_int 

    '''
     
    return ans
def calculategeoh(z, lnsp, ts, qs, levels):
    heighttoreturn=np.full([ts.shape[0],ts.shape[1],ts.shape[2]], -999, np.double)
    geotoreturn=np.copy(heighttoreturn)
    Rd = 287.06
    z_h = 0
    #surface pressure
    sp = np.exp(lnsp)
    # A and B parameters to calculate pressures for model levels,
    #  extracted from an ECMWF ERA-Interim GRIB file and then hardcoded here
    # pv =  [
    #   0.0000000000e+000, 2.0000000000e+001, 3.8425338745e+001, 6.3647796631e+001, 9.5636962891e+001,
    #   1.3448330688e+002, 1.8058435059e+002, 2.3477905273e+002, 2.9849584961e+002, 3.7397192383e+002,
    #   4.6461816406e+002, 5.7565112305e+002, 7.1321801758e+002, 8.8366040039e+002, 1.0948347168e+003,
    #   1.3564746094e+003, 1.6806403809e+003, 2.0822739258e+003, 2.5798886719e+003, 3.1964216309e+003,
    #   3.9602915039e+003, 4.9067070313e+003, 6.0180195313e+003, 7.3066328125e+003, 8.7650546875e+003,
    #   1.0376125000e+004, 1.2077445313e+004, 1.3775324219e+004, 1.5379804688e+004, 1.6819472656e+004,
    #   1.8045183594e+004, 1.9027695313e+004, 1.9755109375e+004, 2.0222203125e+004, 2.0429863281e+004,
    #   2.0384480469e+004, 2.0097402344e+004, 1.9584328125e+004, 1.8864750000e+004, 1.7961359375e+004,
    #   1.6899468750e+004, 1.5706449219e+004, 1.4411125000e+004, 1.3043218750e+004, 1.1632757813e+004,
    #   1.0209500000e+004, 8.8023554688e+003, 7.4388046875e+003, 6.1443164063e+003, 4.9417773438e+003,
    #   3.8509133301e+003, 2.8876965332e+003, 2.0637797852e+003, 1.3859125977e+003, 8.5536181641e+002,
    #   4.6733349609e+002, 2.1039389038e+002, 6.5889236450e+001, 7.3677425385e+000, 0.0000000000e+000,
    #   0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
    #   0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
    #   0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
    #   0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
    #   0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
    #   7.5823496445e-005, 4.6139489859e-004, 1.8151560798e-003, 5.0811171532e-003, 1.1142909527e-002,
    #   2.0677875727e-002, 3.4121163189e-002, 5.1690407097e-002, 7.3533833027e-002, 9.9674701691e-002,
    #   1.3002252579e-001, 1.6438430548e-001, 2.0247590542e-001, 2.4393314123e-001, 2.8832298517e-001,
    #   3.3515489101e-001, 3.8389211893e-001, 4.3396294117e-001, 4.8477154970e-001, 5.3570991755e-001,
    #   5.8616840839e-001, 6.3554745913e-001, 6.8326860666e-001, 7.2878581285e-001, 7.7159661055e-001,
    #   8.1125342846e-001, 8.4737491608e-001, 8.7965691090e-001, 9.0788388252e-001, 9.3194031715e-001,
    #   9.5182150602e-001, 9.6764522791e-001, 9.7966271639e-001, 9.8827010393e-001, 9.9401944876e-001,
    #   9.9763011932e-001, 1.0000000000e+000 ]

    # These are simple the a and b parameter appended into one linst!
    pv = [0,2.00004,3.980832,7.387186,12.908319,21.413612,33.952858,51.746601,76.167656,108.715561,150.986023,204.637451,271.356506,352.824493,450.685791,566.519226,701.813354,857.945801,1036.166504,1237.585449,1463.16394,1713.709595,1989.87439,2292.155518,2620.898438,2976.302246,3358.425781,3767.196045,4202.416504,4663.776367,5150.859863,5663.15625,6199.839355,6759.727051,7341.469727,7942.92627,8564.624023,9208.305664,9873.560547,10558.88184,11262.48438,11982.66211,12713.89746,13453.22559,14192.00977,14922.68555,15638.05371,16329.56055,16990.62305,17613.28125,18191.0293,18716.96875,19184.54492,19587.51367,19919.79688,20175.39453,20348.91602,20434.1582,20426.21875,20319.01172,20107.03125,19785.35742,19348.77539,18798.82227,18141.29688,17385.5957,16544.58594,15633.56641,14665.64551,13653.21973,12608.38379,11543.16699,10471.31055,9405.222656,8356.25293,7335.164551,6353.920898,5422.802734,4550.21582,3743.464355,3010.146973,2356.202637,1784.854614,1297.656128,895.193542,576.314148,336.772369,162.043427,54.208336,6.575628,0.00316,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000014,0.000055,0.000131,0.000279,0.000548,0.001,0.001701,0.002765,0.004267,0.006322,0.009035,0.012508,0.01686,0.022189,0.02861,0.036227,0.045146,0.055474,0.067316,0.080777,0.095964,0.112979,0.131935,0.152934,0.176091,0.20152,0.229315,0.259554,0.291993,0.326329,0.362203,0.399205,0.436906,0.475016,0.51328,0.551458,0.589317,0.626559,0.662934,0.698224,0.732224,0.764679,0.795385,0.824185,0.85095,0.875518,0.897767,0.917651,0.935157,0.950274,0.963007,0.973466,0.982238,0.989153,0.994204,0.99763,1]


    levelSize=len(levels) #60
    A = pv[0:levelSize+1]
    B = pv[levelSize+1:]
    Ph_levplusone = A[levelSize] + (B[levelSize]*sp)
    #Get a list of level numbers in reverse order
    reversedlevels=np.full(levels.shape[0], -999, np.int32)
    for iLev in list(reversed(range(levels.shape[0]))):
        reversedlevels[levels.shape[0] - 1 - iLev] = levels[iLev]
    #Integrate up into the atmosphere from lowest level
    for lev in reversedlevels:
        #lev is the level number 1-60, we need a corresponding index into ts and qs
        ilevel=np.where(levels==lev)[0][0]
        t_level=np.squeeze(ts[ilevel,:,:])
        q_level=np.squeeze(qs[ilevel,:,:])
        #compute moist temperature
        t_level = t_level * (1.+0.609133*q_level)
        #compute the pressures (on half-levels)
        Ph_lev = A[lev-1] + (B[lev-1] * sp)
        if lev == 1:
            dlogP = np.log(Ph_levplusone/0.1)
            alpha = np.log(2)
        else:
            dlogP = np.log(Ph_levplusone/Ph_lev)
            dP    = Ph_levplusone-Ph_lev
            alpha = 1. - ((Ph_lev/dP)*dlogP)
        TRd = t_level*Rd
        # z_f is the geopotential of this full level
        # integrate from previous (lower) half-level z_h to the full level
        try:
            z_f = z_h + (TRd*alpha)
        except:
            stop()
        #Convert geopotential to height
        heighttoreturn[ilevel,:,:] = z_f / 9.80665
        #Geopotential (add in surface geopotential)
        try:
            geotoreturn[:,:,:] = z_f + z
        except:
            stop()
        # z_h is the geopotential of 'half-levels'
        # integrate z_h to next half level
        z_h=z_h+(TRd*dlogP)
        Ph_levplusone = Ph_lev
    return geotoreturn, heighttoreturn
def calculategeoh(z, lnsp, ts, qs, levels):
    heighttoreturn=np.full([ts.shape[0],ts.shape[1],ts.shape[2],ts.shape[3]], -999, np.double)
    geotoreturn=np.copy(heighttoreturn)
    Rd = 287.06
    z_h = 0
    #surface pressure
    sp = np.exp(lnsp)
    # A and B parameters to calculate pressures for model levels,
    #  extracted from an ECMWF ERA-Interim GRIB file and then hardcoded here
    pv =  [
      0.0000000000e+000, 2.0000000000e+001, 3.8425338745e+001, 6.3647796631e+001, 9.5636962891e+001,
      1.3448330688e+002, 1.8058435059e+002, 2.3477905273e+002, 2.9849584961e+002, 3.7397192383e+002,
      4.6461816406e+002, 5.7565112305e+002, 7.1321801758e+002, 8.8366040039e+002, 1.0948347168e+003,
      1.3564746094e+003, 1.6806403809e+003, 2.0822739258e+003, 2.5798886719e+003, 3.1964216309e+003,
      3.9602915039e+003, 4.9067070313e+003, 6.0180195313e+003, 7.3066328125e+003, 8.7650546875e+003,
      1.0376125000e+004, 1.2077445313e+004, 1.3775324219e+004, 1.5379804688e+004, 1.6819472656e+004,
      1.8045183594e+004, 1.9027695313e+004, 1.9755109375e+004, 2.0222203125e+004, 2.0429863281e+004,
      2.0384480469e+004, 2.0097402344e+004, 1.9584328125e+004, 1.8864750000e+004, 1.7961359375e+004,
      1.6899468750e+004, 1.5706449219e+004, 1.4411125000e+004, 1.3043218750e+004, 1.1632757813e+004,
      1.0209500000e+004, 8.8023554688e+003, 7.4388046875e+003, 6.1443164063e+003, 4.9417773438e+003,
      3.8509133301e+003, 2.8876965332e+003, 2.0637797852e+003, 1.3859125977e+003, 8.5536181641e+002,
      4.6733349609e+002, 2.1039389038e+002, 6.5889236450e+001, 7.3677425385e+000, 0.0000000000e+000,
      0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
      0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
      0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
      0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
      0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000, 0.0000000000e+000,
      7.5823496445e-005, 4.6139489859e-004, 1.8151560798e-003, 5.0811171532e-003, 1.1142909527e-002,
      2.0677875727e-002, 3.4121163189e-002, 5.1690407097e-002, 7.3533833027e-002, 9.9674701691e-002,
      1.3002252579e-001, 1.6438430548e-001, 2.0247590542e-001, 2.4393314123e-001, 2.8832298517e-001,
      3.3515489101e-001, 3.8389211893e-001, 4.3396294117e-001, 4.8477154970e-001, 5.3570991755e-001,
      5.8616840839e-001, 6.3554745913e-001, 6.8326860666e-001, 7.2878581285e-001, 7.7159661055e-001,
      8.1125342846e-001, 8.4737491608e-001, 8.7965691090e-001, 9.0788388252e-001, 9.3194031715e-001,
      9.5182150602e-001, 9.6764522791e-001, 9.7966271639e-001, 9.8827010393e-001, 9.9401944876e-001,
      9.9763011932e-001, 1.0000000000e+000 ]
    levelSize=len(levels) #60
    A = pv[0:levelSize+1]
    B = pv[levelSize+1:]
    Ph_levplusone = A[levelSize] + (B[levelSize]*sp)
    #Get a list of level numbers in reverse order
    reversedlevels=np.full(levels.shape[0], -999, np.int32)
    for iLev in list(reversed(range(levels.shape[0]))):
        reversedlevels[levels.shape[0] - 1 - iLev] = levels[iLev]
    #Integrate up into the atmosphere from lowest level
    for lev in reversedlevels:
        #lev is the level number 1-60, we need a corresponding index into ts and qs
        ilevel=np.where(levels==lev)[0][0]
        t_level=np.squeeze(ts[:,ilevel,:,:])
        q_level=np.squeeze(qs[:,ilevel,:,:])
        #compute moist temperature
        t_level = t_level * (1.+0.609133*q_level)
        #compute the pressures (on half-levels)
        Ph_lev = A[lev-1] + (B[lev-1] * sp)
        if lev == 1:
            dlogP = np.log(Ph_levplusone/0.1)
            alpha = np.log(2)
        else:
            dlogP = np.log(Ph_levplusone/Ph_lev)
            dP    = Ph_levplusone-Ph_lev
            alpha = 1. - ((Ph_lev/dP)*dlogP)
        TRd = t_level*Rd
        # z_f is the geopotential of this full level
        # integrate from previous (lower) half-level z_h to the full level
        z_f = z_h + (TRd*alpha)
        #Convert geopotential to height
        heighttoreturn[:,ilevel] = z_f / 9.80665
        #Geopotential (add in surface geopotential)
        try:
            geotoreturn[:,ilevel] = z_f + z[:,0,:,:]
        except:
            stop()
        # z_h is the geopotential of 'half-levels'
        # integrate z_h to next half level
        z_h=z_h+(TRd*dlogP)
        Ph_levplusone = Ph_lev
    return geotoreturn, heighttoreturn
Esempio n. 15
0
def ReadCESMday(DaySel,
                Exp,
                iWest,
                iEast,
                iSouth,
                iNort,
                rgrTimeCESMFull,
                VARS=None,
                AddCells=0):

    """
    Read in a single day within a region from one
    CESM large ensemble simulation
    All variables nescessary for a synopic mapplot are read in
    """

    if VARS == None:
        rgsWTvars=['Z500','U850','V850','TMQ',]
        VarsFullName=  ['Z500','U850','V850','PW']
        rgsWTfolders=['/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/Z500/',\
                      '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/U850/',\
                      '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/V850/',\
                      '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/TMQ/']
    else:
        rgsWTvars=VARS[0]
        VarsFullName=VARS[1]
        rgsWTfolders=VARS[2]
    s20Cname='b.e11.B20TRC5CNBDRD.f09_g16.'
    s21Cname='b.e11.BRCP85C5CNBDRD.f09_g16.'

    # start reading in the CESM data
    iRegionPlus=AddCells # grid cell added around shape rectangle
    ncid=Dataset('/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/PSL/b.e11.B20TRC5CNBDRD.f09_g16.001.cam.h1.PSL.18500101-20051231.nc', mode='r')
    rgrLonWT1D=np.squeeze(ncid.variables['lon'][:])
    rgrLatWT1D=np.squeeze(ncid.variables['lat'][:])
    ncid.close()
    rgrLonS=rgrLonWT1D[iWest-iRegionPlus:iEast+iRegionPlus]
    rgrLatS=rgrLatWT1D[iSouth-iRegionPlus:iNort+iRegionPlus]

    # Read the variables
    DataAll=np.zeros((len(rgrLatS),len(rgrLonS),len(rgsWTvars))); DataAll[:]=np.nan
    for va in range(len(rgsWTvars)):
        if DaySel.year < 2006:
            if Exp  == '001':
                rgrTimeCESM=pd.date_range(datetime.date(1850, 1, 1), end=datetime.date(2005, 12, 31), freq='d')
            else:
                rgrTimeCESM=pd.date_range(datetime.date(1920, 1, 1), end=datetime.date(2005, 12, 31), freq='d')
            Cfiles=glob.glob(rgsWTfolders[va]+'/'+s20Cname+Exp+'*'+rgsWTvars[va]+'*')[0]
        if DaySel.year >= 2006:
            if int(Exp)  >= 34:
                rgrTimeCESM=pd.date_range(datetime.date(2006, 1, 1), end=datetime.date(2100, 12, 31), freq='d')
                Cfiles=glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*')[0]
            elif DaySel.year <= 2080:
                rgrTimeCESM=pd.date_range(datetime.date(2006, 1, 1), end=datetime.date(2080, 12, 31), freq='d')
                try:
                    Cfiles=np.sort(glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*'))[0]
                except:
                    stop()
            elif DaySel.year >= 2081:
                rgrTimeCESM=pd.date_range(datetime.date(2081, 1, 1), end=datetime.date(2100, 12, 31), freq='d')
                Cfiles=np.sort(glob.glob(rgsWTfolders[va]+'/'+s21Cname+Exp+'*'+rgsWTvars[va]+'*'))[1]
        rgiNonLeap=np.where((rgrTimeCESM.month != 2) | (rgrTimeCESM.day != 29))[0]
        rgrTimeCESM=rgrTimeCESM[rgiNonLeap]
        iDDselect=np.where(rgrTimeCESM == DaySel)[0][0]

        
        try:
            ncid=Dataset(Cfiles, mode='r')
            DataAll[:,:,va]=np.squeeze(ncid.variables[rgsWTvars[va]][iDDselect,iSouth-iRegionPlus:iNort+iRegionPlus,iWest-iRegionPlus:iEast+iRegionPlus])
            ncid.close()
        except:
            stop()
        

    return DataAll, rgrLonS, rgrLatS
Esempio n. 16
0
def GaugeDepth(config) :
    '''
    assess the depth of each sample at the given regions
    '''

    startTime = time.clock()
    regionDict = defaultdict(set)
    for item in config.regions:
        if str(str(item).split('.')[-1]).lower() == 'bed':      # this item is a bed file
            regionDict = parseRegionBed(item, regionDict)
        elif str(str(item).split(':')[0]).startswith('chr'):    # this is a string 
            reg_chr = str(item.split(':')[0])
            try:
                reg_str = str(str(item.split(':')[1]).split('-')[0])
                reg_end = str(str(item.split(':')[1]).split('-')[1])
                name = str(reg_chr) + ":" + str(reg_str) + "-" + str(reg_end)
            except IndexError:                                  # represent whole chromosome regions [ex: chr2] by chrN:0-0 in the region dictionary   
                reg_str = 1
                reg_end = 1E9
                name = str(reg_chr)
            regionDict[reg_chr].add((reg_str, reg_end, name))
    if not regionDict:
        abortWithMessage("Regions not set!")

    covD = {'chr':[], 'start':[], 'stop':[], 'name':[], 'sample':[],'depth':[]}
    print("\n=== Reading BAM Files ===")
    for sid, fns in config.bams.items():
        # loop over all samples
        for fn in fns:
            # loop over all bam files for this sample
            try:
                samfile = pysam.AlignmentFile(fn, "rb" )
            except ValueError:
                throwWarning("Cannot open file {0}".format(fn))
                continue
            for contig, ROI in regionDict.items():
                for window in ROI:
                    try:
                        bed_name = window[2]
                    except:
                        stop()
                    # make window 0
                    window = [int(window[0]) - 1, int(window[1])]
                    
                    
                    # loop over all ROIs, checking this bam 
                    if config.p:
                        #point method 
                        tmp_dict = {}
                        position = round((window[1] - window[0])/2.0) + window[0]
                        avg_covg = samfile.count(contig, position - 1, position)

                        #for position in range(window[0],window[1]):
                        #    region = str(contig) + ':' + str(position) + '-' + str(position)
                        #    tmp_dict[position] = samfile.count(region=region)
                        #avg_covg = np.mean(tmp_dict.values())
                    elif config.c:
                        #read count method
                        avg_covg = samfile.count(contig, window[0], window[1])
                        #note that "avg_covg" is only a name here - it is the total count of reads, not an average! 
                    else:
                        #complete average method
                        #'''
                        tmp_dict = {}
                        for position in range(window[0],window[1]):
                            tmp_dict[position] = 0
                        for pileupcolumn in samfile.pileup(contig, window[0], window[1],stepper='all'):
                            #loop over reads that hit the window locations and record coverage 
                            # 'stepper = all' yields mapped, primary, non-duplicate (identified by sam flag), QC pass reads
                            try:
                                tmp_dict[pileupcolumn.pos]
                                tmp_dict[pileupcolumn.pos] = pileupcolumn.n       
                            except:
                                #skip this position if it's not in the region dict
                                continue
                        avg_covg = np.mean(tmp_dict.values())
                        #'''
                        '''
                        #this behaves erratically and does not produce the same number if run repeatedly
                        #   not sure how to use the function, but it could be faster than pileup
                        counter = 0
                        for ct_cov in samfile.count_coverage(contig, window[0], window[1], read_callback = 'all'):
                            for nt_arr in ct_cov:
                                counter += int(nt_arr)
                        stop()
                        avg_covg = counter/float(window[1] - window[0])
                        '''


                    covD['chr'].append(str(contig))
                    covD['start'].append(int(window[0]) + 1)
                    covD['stop'].append(int(window[1]))
                    covD['name'].append(str(bed_name))
                    covD['sample'].append(str(sid))
                    covD['depth'].append(float(avg_covg))     
            samfile.close()
            totalTime = time.clock() - startTime
            print("{0:02d}:{1:02d}\t{2}".format(int(totalTime/60), int(totalTime % 60), fn))
    covDF = pd.DataFrame.from_dict(covD)[['chr','start','stop','name','sample','depth']]
    covDF = covDF.groupby(['chr','start','stop','name','sample'])['depth'].apply(sum).reset_index()
            
    totalTime = time.clock() - startTime
    print("\n{0:02d}:{1:02d}\t{2}".format(int(totalTime/60), int(totalTime % 60), "Done"))
    return covDF
Esempio n. 17
0
import noddy2
from pdb import set_trace as stop

if __name__ == '__main__':
	nn = noddy2.Noddy()
	aa = nn.session_id
	stop()