Example #1
0
    def __init__(self, msg_folder, y1=y1, y2=y2, months=None):

        yrange = range(y1, y2 + 1)  # 1998, 2014
        if months is None:
            mrange = range(1, 13)
        else:
            if len(months) > 1:
                mrange = months
            else:
                mrange = range(months[0], months[0] + 1)

        try:
            lpath = uarr.locate('lon.npz', msg_folder, exclude=None)
        except:
            print('Not a directory or no msg lat/lon found')
            return

        mpath = os.path.join(msg_folder, 'msg_raw_binary')

        try:
            os.path.isdir(mpath)
        except:
            print('No msg_raw_binary')
            quit()

        rfiles = []
        for yr, mo in itertools.product(
                yrange, mrange):  # rain_f4 files only available for 6 to 10

            filepath = os.path.join(mpath, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('.gra', filepath, exclude='_182')
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        msg_latlon = np.load(lpath[0])
        mlon = msg_latlon['lon']
        mlat = msg_latlon['lat']

        self.lat = mlat
        self.lon = mlon
        self.nx = mlon.shape[1]
        self.ny = mlon.shape[0]

        self.years = os.listdir(mpath)
        self.root = msg_folder
        self.fpath = rfiles
Example #2
0
    def __init__(self, msg_folder, y1=y1, y2=y2, months=None):

        yrange = range(y1, y2+1)  # 1998, 2014
        if months is None:
            mrange = range(1,13)
        else:
            if len(months) > 1:
                mrange = months
            else:
                mrange = range(months[0],months[0]+1)

        try:
            lpath = uarr.locate('lon.npz', msg_folder, exclude = None)
        except:
            print('Not a directory or no msg lat/lon found')
            return

        mpath = os.path.join(msg_folder, 'msg_raw_binary')

        try:
            os.path.isdir(mpath)
        except:
            print('No msg_raw_binary')
            quit()

        rfiles = []
        for yr, mo in itertools.product(yrange, mrange):  # rain_f4 files only available for 6 to 10

            filepath = os.path.join(mpath, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('.gra', filepath, exclude = '_182')
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        msg_latlon = np.load(lpath[0])
        mlon = msg_latlon['lon']
        mlat = msg_latlon['lat']

        self.lat = mlat
        self.lon = mlon
        self.nx = mlon.shape[1]
        self.ny = mlon.shape[0]

        self.years = os.listdir(mpath)
        self.root = msg_folder
        self.fpath = rfiles
def composite():
    pool = multiprocessing.Pool(processes=7)
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/')   # /WA30/
    out = '/users/global/cornkle/papers/wavelet/saves/pandas/'
    #files = files[0:1500]
    print('Nb files', len(files))
    tt = 'WA15'

    comp_collect = {}
    precip = {}

    res = pool.map(file_loop, files)
    pool.close()
    res = [x for x in res if x is not None]

    nb_sys = len(res)

    print('Number systems: ', nb_sys)

    res = [item for sublist in res for item in sublist] # flatten list of lists

    for v in res:

        comp_collect[v[2]]={'p': [], 't' : [], 'scale':[], 'hour':[], 'id' : []}
        precip[v[2]]=[]

    # ret.append((kernel, kernelt, sc, id, dic['time.hour'].values.tolist(),
    #             clat, clon, lat_min, lat_max, lon_min, lon_max, area,
    #             bulk_pmax, bulk_pmean, bulk_tmean, bulk_tmean_p, bulk_tmin_p, bulk_g30,
    #             circle_Tcenter, circle_p, circle_t, circle_valid, circle_sum,
    #             circle_nz, circle_g30, circle_max, circle_p99, circle_p95, circle_p90))

    dic = OrderedDict([('scale', []), ('id' , []), ('hour' , []),
           ('clat',[]), ('clon',[]),('lat_min',[]), ('lat_max' , []), ('lon_min' , []), ('lon_max' , []), ('area' , []),
           ('bulk_pmax' , []), ('bulk_pmean' ,[]), ('bulk_tmean',[]), ('bulk_tmean_p',[]), ('bulk_tmin_p',[]), ('bulk_g30',[]),
           ('circle_pix' , []), ('circle_Tcentre', []), ('circle_p' , []), ('circle_t' , []), ('circle_val' , []), ('circle_sum' , []),
           ('circle_nz' , []), ('circle_g30' , []), ('circle_max' , []), ('circle_p99' , []), ('circle_p95' , []), ('circle_p90' , []), ('circle_val_all', []), ('circle_pc', [])])

    keys = comp_collect.keys()
    print(keys)

    for v in res:

        print(v[2])

        comp_collect[v[2]]['p'].append(v[0])
        comp_collect[v[2]]['t'].append(v[1])
        comp_collect[v[2]]['hour'].append(v[4])
        comp_collect[v[2]]['id'].append(v[3])

        for cnt, kk in enumerate(dic.keys()):

            dic[kk].append(v[cnt+2])  # omit kernel and kernelt

        precip[v[2]].extend(v[20])


    pkl.dump(dic, open(out+'3dmax_gt15000_lax_nonan_dominant_fulldomain.p','wb'))
Example #4
0
def run():
    files = ua.locate(".nc", '/users/global/cornkle/VERA')
    pool = multiprocessing.Pool(processes=4)
    out = '/users/global/cornkle/VERA/blobs/'


    res = pool.map(file_loop, files)
    pool.close()
    res = [x for x in res if x is not None]

    nb_sys = len(res)

    print('Number systems: ', nb_sys)

    res = [item for sublist in res for item in sublist]  # flatten list of lists


    # blon, blat, blon_c, blat_c, p ,pmax, pmean

    dic = OrderedDict([('lons', []) , ('lats', []), ('lon_c', []), ('lat_c', []),
                       ('p', []), ('pmax', []), ('pmean', []), ('hour', []), ('area', []) ])

    dic2 = OrderedDict([('lon_centre', []), ('lat_centre', []),
                        ('pmax', []), ('pmean', []), ('hour', []), ('area', []) ])

    dic3 = {'precip_pixel' : []}

    pick = [2,3,5,6,7,8]

    for v in res:

        for cnt, kk in enumerate(dic.keys()):

            dic[kk].append(v[cnt])  # omit kernel and kernelt

    for v in res:

        for cnt, kk in zip(pick,dic2.keys()):
            if (cnt == 0) or (cnt ==1) or (cnt ==4):
                continue
            dic2[kk].append(v[cnt])  # omit kernel and kernelt
        dic3['precip_pixel'].extend(v[4])


    pkl.dump(dic, open(out + 'trmm_blobs_1000km2.p', 'wb'))

    df = pd.DataFrame.from_dict(dic2)

    df.to_csv(out + 'trmm_cluster.csv')

    df = pd.DataFrame.from_dict(dic3)
    df.to_csv(out + 'trmm_pixel.csv')
Example #5
0
def rewriteBigcellTab():
    path = "/users/global/cornkle/data/OBS/meteosat_WA30/bigcell_area_table/"
    out = path + 'rewrite/'
    print(out)
    os.system('rm ' + out + '*.txt')
    ok = uarr.locate("*.txt", path)

    for a in ok:
        print('Doing ' + a)
        tab = parseCellTables(a)
        minute = tab["Date"][0].minute
        hour = tab["Date"][0].hour
        tab.to_csv(out + 'cell_40c_' + str(hour).zfill(2) + str(minute).zfill(2) + '_JJAS.txt')
Example #6
0
def rewriteBigcellTab():
    path = "/users/global/cornkle/data/OBS/meteosat_WA30/bigcell_area_table/"
    out = path + 'rewrite/'
    print(out)
    os.system('rm ' + out + '*.txt')
    ok = uarr.locate("*.txt", path)

    for a in ok:
        print('Doing ' + a)
        tab = parseCellTables(a)
        minute = tab["Date"][0].minute
        hour = tab["Date"][0].hour
        tab.to_csv(out + 'cell_40c_' + str(hour).zfill(2) +
                   str(minute).zfill(2) + '_JJAS.txt')
Example #7
0
def perSys():

    pool = multiprocessing.Pool(processes=4)
    tthresh = '-40'
    files = ua.locate(
        ".nc", '/users/global/cornkle/MCSfiles/WA5000_4-8N_13W-13E_' +
        tthresh + '_18UTC/')

    print('Nb files', len(files))
    mdic = dictionary()  #defaultdict(list)
    res = pool.map(file_loop, files)
    pool.close()
    #
    #
    # for f in files:
    #     file_loop(f)

    #
    #res = [item for sublist in res for item in sublist]  # flatten list of lists

    keys = mdic.keys()
    for v in res:
        for k in keys:
            try:
                mdic[k].append(v[k])
            except TypeError:
                continue

        # if v[2]*25 > 1000000:
        #     tplt = v[9]
        #     tplt[np.where(tplt==np.nan)]=0
        # f = plt.figure()
        # ax = plt.axes(projection=ccrs.PlateCarree())
        # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree())
        # ax.coastlines()
        # plt.colorbar()
        # ax.add_feature(cartopy.feature.BORDERS, linestyle='--')

    # f = plt.figure()
    # siz = 3
    #
    # ax = f.add_subplot(1, 1, 1)
    # plt.scatter(mdic['tmin'], mdic['pmax'])
    # plt.title('bulk', fontsize=9)

    pkl.dump(
        mdic,
        open(
            '/users/global/cornkle/data/CLOVER/saves/bulk_' + tthresh +
            '_zeroRain_gt5k_-40thresh_OBSera.p', 'wb'))
Example #8
0
def save():

    files = ua.locate(".nc", '/users/global/cornkle/data/Amazon')
    # files = files[6:7]

    years = len(files)
    yy = np.arange(1984, 2009, 4)

    scale_id = 7

    ylist = []
    vlist = []

    for i, f in enumerate(files):
        y = yy[i]

        print('Doing ' + f)

        array = xr.open_dataset(f)

        wl, scales, forest = run(array)

        print(scales)

        wwl = wl[scale_id]

        yarr = xr.DataArray(wwl[np.newaxis, ...],
                            coords=[array.time, array.lat, array.lon],
                            dims=['time', 'lat', 'lon'])
        veg = xr.DataArray(forest[np.newaxis, ...],
                           coords=[array.time, array.lat, array.lon],
                           dims=['time', 'lat', 'lon'])

        ylist.append(yarr)
        vlist.append(veg)

    yarr = xr.concat(ylist, dim='time')
    veg = xr.concat(vlist, dim='time')

    sc = int(scales[scale_id] / 1000.)

    xarr = xr.Dataset()
    xarr['vegfra'] = veg
    xarr['wav'] = yarr

    xarr.to_netcdf('/users/global/cornkle/amazon/nc/rhod_' + str(sc) +
                   'kmt.nc')
    print('Saved ' + '/users/global/cornkle/amazon/nc/rhod_' + str(sc) +
          'kmt.nc')
Example #9
0
def perSys():

    pool = multiprocessing.Pool(processes=4)
    tthresh = '-40'
    files = ua.locate(".nc", cnst.network_data + 'MCSfiles/WA5000_4-8N_13W-13E_'+tthresh+'_18UTC/')

    print('Nb files', len(files))
    mdic = dictionary() #defaultdict(list)
    res = pool.map(file_loop, files)
    pool.close()
    #
    #
    # for f in files:
    #     file_loop(f)

    #
    #res = [item for sublist in res for item in sublist]  # flatten list of lists

    keys = mdic.keys()
    for v in res:
        for k in keys:
            try:
                mdic[k].append(v[k])
            except TypeError:
                continue

        # if v[2]*25 > 1000000:
        #     tplt = v[9]
        #     tplt[np.where(tplt==np.nan)]=0
            # f = plt.figure()
            # ax = plt.axes(projection=ccrs.PlateCarree())
            # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree())
            # ax.coastlines()
            # plt.colorbar()
            # ax.add_feature(cartopy.feature.BORDERS, linestyle='--')


    # f = plt.figure()
    # siz = 3
    #
    # ax = f.add_subplot(1, 1, 1)
    # plt.scatter(mdic['tmin'], mdic['pmax'])
    # plt.title('bulk', fontsize=9)


    pkl.dump(mdic, open(cnst.network_data + 'data/CLOVER/saves/bulk_'+tthresh+'_zeroRain_gt5k_-40thresh_OBSera_thicklayer.p',
                           'wb'))
Example #10
0
def save():

    files = ua.locate(".nc", '/users/global/cornkle/data/Amazon')
   # files = files[6:7]

    years = len(files)
    yy = np.arange(1984, 2009, 4)

    scale_id = 7

    ylist = []
    vlist = []

    for i, f in enumerate(files):
        y = yy[i]

        print('Doing ' +f)

        array = xr.open_dataset(f)

        wl, scales, forest = run(array)

        print(scales)

        wwl = wl[scale_id]

        yarr = xr.DataArray(wwl[np.newaxis,...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon'])
        veg = xr.DataArray(forest[np.newaxis,...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon'])

        ylist.append(yarr)
        vlist.append(veg)

    yarr = xr.concat(ylist, dim='time')
    veg = xr.concat(vlist, dim='time')


    sc = int(scales[scale_id]/1000.)

    xarr = xr.Dataset()
    xarr['vegfra']=veg
    xarr['wav']=yarr

    xarr.to_netcdf('/users/global/cornkle/amazon/nc/rhod_'+str(sc)+'kmt.nc')
    print('Saved '+'/users/global/cornkle/amazon/nc/rhod_'+str(sc)+'kmt.nc')
Example #11
0
def run():
    pool = multiprocessing.Pool(processes=7)
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E/')   # /WA30/
    out = '/users/global/cornkle/C_paper/chris2016/'
    #files = files[0:1000]
    print('Nb files', len(files))

    res = pool.map(file_loop, files)
    pool.close()
    res = [x for x in res if x is not None]

    nb_sys = len(res)

    print('Number systems: ', nb_sys)

    res = [item for sublist in res for item in sublist] # flatten list of lists



    dic = {'year' : [], 'month' : [], 'hour' : [], 'precip':[], 'sum30' : [],  'sum20' :[],  'sum':[], 'valid':[],
           'nz':[], 'clon' : [], 'clat' : [], 'cent': [] }  #  big , fin,shape, sum, sumvalid, tmin

    for v in res:

        dic['year'].append(v[0])
        dic['month'].append(v[1])
        dic['hour'].append(v[2])
        dic['precip'].append(v[3])
        dic['sum30'].append(v[4])
        dic['sum20'].append(v[5])
        dic['sum'].append(v[6])
        dic['valid'].append(v[7])
        dic['nz'].append(v[8])
        dic['clat'].append(v[9])
        dic['clon'].append(v[10])
        dic['cent'].append(v[11])


    #df = pd.DataFrame(dic)
    #df.to_pickle(out+'3dmax_gt15000_fakeprecip_-70.pkl')

    pkl.dump(dic, open(out+'chris_mcs_-40_gt1000.p',
                           'wb'))
Example #12
0
def perSys():

    tthresh = '-50'
    files = ua.locate(
        ".nc",
        '/media/ck/Elements/Africa/WestAfrica/CP4/CP25_16-19UTC_future_5000km2_-50C_TCWV'
    )  #CP25_-50C_5000km2
    print('Nb files', len(files))
    for y in range(2000, 2007):

        yfiles = []
        for f in files:
            if str(y) in f:
                yfiles.append(f)
        pool = multiprocessing.Pool(processes=4)

        mdic = dictionary()  #defaultdict(list)
        print('Yearly files', len(yfiles))
        # ipdb.set_trace()
        # res = pool.map(file_loop, yfiles)
        # pool.close()

        res = []
        for f in yfiles:
            res.append(file_loop(f))

        ipdb.set_trace()
        keys = mdic.keys()
        for v in res:
            for k in keys:
                try:
                    mdic[k].append(v[k])
                except TypeError:
                    continue

        pkl.dump(
            mdic,
            open(
                cnst.network_data + 'data/CLOVER/saves/bulk_' + tthresh +
                '_5000km2_P25means_hourly_SAHEL_15kmprecip_WA_5-20N_-50C_TCWV_fut_'
                + str(y) + '.p', 'wb'))
        print('Saved file')
Example #13
0
def minmax():

    pool = multiprocessing.Pool(processes=5)
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA30/')
    print('Nb files', len(files))

    res = pool.map(file_loop, files)
    pool.close()
    #
    res = [item for sublist in res
           for item in sublist]  # flatten list of lists
    print('test')
    #
    temp = []
    ptemp = []
    grad = []
    pgrad = []

    for v in res:
        if v[0] == 't':
            temp.append(v[1])
            ptemp.append(v[2])
        if v[0] == 'g':
            grad.append(v[1])
            pgrad.append(v[2])

    f = plt.figure()

    siz = 3

    ax = f.add_subplot(1, 2, 1)
    plt.scatter(temp, ptemp)
    plt.title('temp', fontsize=9)

    ax = f.add_subplot(1, 2, 2)
    plt.scatter(grad, pgrad)
    plt.title('grad', fontsize=9)
Example #14
0
def minmax():

    pool = multiprocessing.Pool(processes=5)
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA30/')
    print('Nb files', len(files))

    res = pool.map(file_loop, files)
    pool.close()
    #
    res = [item for sublist in res for item in sublist]  # flatten list of lists
    print('test')
    #
    temp = []
    ptemp = []
    grad = []
    pgrad = []

    for v in res:
        if v[0] == 't':
            temp.append(v[1])
            ptemp.append(v[2])
        if v[0] == 'g':
            grad.append(v[1])
            pgrad.append(v[2])

    f = plt.figure()


    siz = 3

    ax = f.add_subplot(1, 2, 1)
    plt.scatter(temp, ptemp)
    plt.title('temp', fontsize=9)

    ax = f.add_subplot(1, 2, 2)
    plt.scatter(grad, pgrad)
    plt.title('grad', fontsize=9)
Example #15
0
def readMCS_getWavelet_tresh():
             
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')
    
#    arr=np.array([15,   16,   17,   18,   19,   20,   21,   22,   24,
#         25,   27,   28,   30,   32,   34,   36,   38,   40,
#         42,   45,   48,   50,   53,   57,   60,   64,   67,
#         71,   76,   80,   85,   90,   95,  101,  107,  113,
#        120,  127,  135,  143,  151,  160,  170,  180,  190,  202], dtype=str)
    
    arr=np.array([10, 11, 11, 12,13, 13, 14,15,   16,   17,   18,   19,   20,   21,   22,   24,
         25,   27,   28,   30,   32,   34,   36,   38,   40,
         42,   45,   48,   50,   53,   57,   60], dtype=str)
         
    ext=np.array([10,15,20,30,60], dtype=str)      
    rrange=list(range(arr.size))
    scales=np.in1d(arr, ext)
    rpos=np.array(rrange)[scales]
    
    print(rpos)
    
    wave={}
    wave['torig0_min']=[]
    wave['torig1_min']=[]
    wave['torig2_min']=[]
    wave['torig3_min']=[]
    wave['torigx_min']=[]
    wave['porig_max']=[]
    
    wave['torig0']=[]    
    wave['torig1']=[]
    wave['torig2']=[]
    wave['torig3']=[]
    wave['torigx']=[]
    wave['porig']=[]
            
    for a in ext:              
        wave[a]={}           
        
        wave[a]['twavelet0']=[]
        wave[a]['twavelet1']=[]
        wave[a]['twavelet2']=[]
        wave[a]['twavelet3']=[]
        wave[a]['twaveletx']=[]
        wave[a]['pwavelet']=[]           
        
        wave[a]['twavelet0_max']=[]
        wave[a]['twavelet1_max']=[]
        wave[a]['twavelet2_max']=[]
        wave[a]['twavelet3_max']=[]
        wave[a]['twaveletx_max']=[]
        wave[a]['pwavelet_max']=[]
               
 #   return wave   
    
    for f in files:
        print('Doing file: '+f)
        dic = xr.open_dataset(f)
        
     #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
          # print('smaller') 
     #      continue       
        
        outt0=np.array(dic['tc_lag0'].values.copy())
        outt1=np.array(dic['tc_lag1'].values.copy())
        outt2=np.array(dic['tc_lag2'].values.copy())
        outt3=np.array(dic['tc_lag3'].values.copy())
        outtx=np.array(dic['tc_lagx'].values.copy())
    
        mmeans=np.percentile(outt0[np.isfinite(outt0)], 30)
        
        print(mmeans)
        
        maxi=np.nanmin(outt0)
        thresh=maxi+15        
        
        outp=np.array(dic['p'].values.copy())
     
        outp[np.isnan(outp)]=-10**-5
        
        outt0[np.isnan(outt0)]=mmeans   # -40
        outt0[outt0>thresh]=mmeans
        outt1[np.isnan(outt1)]=mmeans
        outt1[outt1>thresh]=mmeans
        outt2[np.isnan(outt2)]=mmeans
        outt2[outt2>thresh]=mmeans
        outt3[np.isnan(outt3)]=mmeans
        outt3[outt3>thresh]=mmeans
        outtx[np.isnan(outtx)]=mmeans 
        outtx[outtx>thresh]=mmeans
    #    outt2[outt2>thresh]=-40#mmeans
        
        if np.mean(outt0)==mmeans:
            continue
        if np.mean(outt1)==mmeans:
            continue
        if np.mean(outt2)==mmeans:
            continue
        if np.mean(outt3)==mmeans:
            continue
        if np.mean(outtx)==mmeans:
            continue
        
        if not outtx[outtx<mmeans].any():
            continue
        
        print('Wavelet start')
                       
        wav0 = util.waveletTP(outt0, outp, 5) 
        
        wav1 = util.waveletTP(outt1, outp, 5)
        
        wav2 = util.waveletTP(outt2, outp, 5)

        wav3 = util.waveletTP(outt3, outp, 5)
 
        wavx = util.waveletTP(outtx, outp, 5)

        #print(wav1['scales'])
  
        
      #  outt0[np.where(dic['pmask'].values==0)]=mmeans
      #  outt1[np.where(dic['pmask'].values==0)]=mmeans   
      #  outt2[np.where(dic['pmask'].values==0)]=mmeans
     #   outt3[np.where(dic['pmask'].values==0)]=mmeans      
     #   outtx[np.where(dic['pmask'].values==0)]=mmeans      
        
        wave['torig0_min'].append(np.percentile(outt0[outt0<mmeans], 1))        
        wave['torig1_min'].append(np.percentile(outt1[outt1<mmeans], 1))
        wave['torig2_min'].append(np.percentile(outt2[outt2<mmeans], 1))
        wave['torig3_min'].append(np.percentile(outt3[outt3<mmeans], 1))
        wave['torigx_min'].append(np.percentile(outtx[outtx<mmeans], 1))
        wave['porig_max'].append(np.percentile(outp[outp>1], 99))
    
        wave['torig0'].append(outt0)
        wave['torig1'].append(outt1)
        wave['torig2'].append(outt2)
        wave['torig3'].append(outt3)
        wave['torigx'].append(outtx)
        wave['porig'].append(outp)
            
        
        for pos in rpos:  

            print(arr[pos])            
            tt0=np.array(wav0['t'][pos,:,:])                  
            tt1=np.array(wav1['t'][pos,:,:])
            tt2=np.array(wav2['t'][pos,:,:])
            tt3=np.array(wav3['t'][pos,:,:])
            ttx=np.array(wavx['t'][pos,:,:])
            pp=np.array(wav2['p'][pos,:,:])
            
            tt0[np.where(dic['pmask'].values==0)]=0                      
            tt1[np.where(dic['pmask'].values==0)]=0
            tt2[np.where(dic['pmask'].values==0)]=0      
            tt3[np.where(dic['pmask'].values==0)]=0
            ttx[np.where(dic['pmask'].values==0)]=0                                
           
            wave[arr[pos]]['twavelet0'].append(tt0)            
            wave[arr[pos]]['twavelet1'].append(tt1)
            wave[arr[pos]]['twavelet2'].append(tt2)
            wave[arr[pos]]['twavelet3'].append(tt3)
            wave[arr[pos]]['twaveletx'].append(ttx)
            wave[arr[pos]]['pwavelet'].append(pp)     
            
            wave[arr[pos]]['twavelet0_max'].append(np.percentile(tt0[tt0>0], 99))
            wave[arr[pos]]['twavelet1_max'].append(np.percentile(tt1[tt1>0], 99))
            wave[arr[pos]]['twavelet2_max'].append(np.percentile(tt2[tt2>0], 99))
            wave[arr[pos]]['twavelet3_max'].append(np.percentile(tt3[tt3>0], 99))
            wave[arr[pos]]['twaveletx_max'].append(np.percentile(ttx[ttx>0], 99))
            wave[arr[pos]]['pwavelet_max'].append(np.percentile(pp[pp>0], 99))
    
    pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc_thresh.p', 'wb'))
    
    print('Saved!')  
import pandas as pd
import multiprocessing
import pickle as pkl
from scipy.ndimage.measurements import label
import pdb


dic = pkl.load( open ('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p', 'rb'))

p30 = np.array(dic['po30'])
lat = np.array(dic['clat'])
mcs_count = np.sum(p30[(lat >= 4) & (lat<=7.5)])



files = ua.locate(".nc", '/users/global/cornkle/TRMMfiles')
cnt = 0
for f in files:
    print('Doing ', f)
    xa = xr.open_dataset(f)

    lat = xa.lat.values
    lon = xa.lon.values
    arr = xa['p'].values
    arr = arr[(lat>=4) & (lat <= 7.8) & (lon >=-17) & (lon <=20)]
    nb = np.sum(arr >= 30)

    cnt += nb


print('MCS frac', mcs_count/cnt)
def composite():
    pool = multiprocessing.Pool(processes=7)
    files = ua.locate(
        ".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/'
    )  # /WA30/
    out = '/users/global/cornkle/C_paper/wavelet/saves/pandas/'
    #files = files[0:400]
    print('Nb files', len(files))
    tt = 'WA15'

    comp_collect = {}
    precip = {}

    res = pool.map(file_loop, files)
    pool.close()
    res = [x for x in res if x is not None]

    nb_sys = len(res)

    print('Number systems: ', nb_sys)

    res = [item for sublist in res
           for item in sublist]  # flatten list of lists

    for v in res:

        comp_collect[v[2]] = {
            'p': [],
            't': [],
            'scale': [],
            'hour': [],
            'id': []
        }
        precip[v[2]] = []

    # ret.append((kernel, kernelt, sc, id, dic['time.hour'].values.tolist(),
    #             clat, clon, lat_min, lat_max, lon_min, lon_max, area,
    #             bulk_pmax, bulk_pmean, bulk_tmean, bulk_tmean_p, bulk_tmin_p, bulk_g30,
    #             circle_Tcenter, circle_p, circle_t, circle_valid, circle_sum,
    #             circle_nz, circle_g30, circle_max, circle_p99, circle_p95, circle_p90))

    dic = OrderedDict([('scale', []), ('id', []), ('hour', []), ('clat', []),
                       ('clon', []), ('lat_min', []), ('lat_max', []),
                       ('lon_min', []), ('lon_max', []), ('area', []),
                       ('bulk_pmax', []),
                       ('bulk_pmean', []), ('bulk_tmean', []),
                       ('bulk_tmean_p', []), ('bulk_tmin_p', []),
                       ('bulk_g30', []), ('circle_pix', []),
                       ('circle_Tcentre', []), ('circle_p', []),
                       ('circle_t', []), ('circle_val', []),
                       ('circle_sum', []), ('circle_nz', []),
                       ('circle_g30', []), ('circle_max', []),
                       ('circle_p99', []), ('circle_p95', []),
                       ('circle_p90', []), ('circle_val_all', []),
                       ('circle_pc', [])])

    keys = comp_collect.keys()
    print(keys)

    for v in res:

        print(v[2])

        comp_collect[v[2]]['p'].append(v[0])
        comp_collect[v[2]]['t'].append(v[1])
        comp_collect[v[2]]['hour'].append(v[4])
        comp_collect[v[2]]['id'].append(v[3])

        for cnt, kk in enumerate(dic.keys()):

            dic[kk].append(v[cnt + 2])  # omit kernel and kernelt

        precip[v[2]].extend(v[20])

    pkl.dump(dic, open(out + '3dmax_gt15000_-60.p', 'wb'))

    pkl.dump(precip, open(out + 'precip_3dmax_gt15000_-60.p', 'wb'))

    pkl.dump(comp_collect, open(out + 'comp_collect_composite_-60.p', 'wb'))
Example #18
0
def readMCS_getWavelet():

    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')

    #    arr=np.array([15,   16,   17,   18,   19,   20,   21,   22,   24,
    #         25,   27,   28,   30,   32,   34,   36,   38,   40,
    #         42,   45,   48,   50,   53,   57,   60,   64,   67,
    #         71,   76,   80,   85,   90,   95,  101,  107,  113,
    #        120,  127,  135,  143,  151,  160,  170,  180,  190,  202], dtype=str)

    arr = np.array([
        10, 11, 11, 12, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27,
        28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60
    ],
                   dtype=str)

    ext = np.array([10, 15, 20, 30, 60], dtype=str)
    rrange = list(range(arr.size))
    scales = np.in1d(arr, ext)
    rpos = np.array(rrange)[scales]

    print(rpos)

    wave = {}
    wave['torig0_min'] = []
    wave['torig1_min'] = []
    wave['torig2_min'] = []
    wave['torig3_min'] = []
    wave['torigx_min'] = []
    wave['porig_max'] = []

    wave['torig0'] = []
    wave['torig1'] = []
    wave['torig2'] = []
    wave['torig3'] = []
    wave['torigx'] = []
    wave['porig'] = []

    for a in ext:
        wave[a] = {}

        wave[a]['twavelet0'] = []
        wave[a]['twavelet1'] = []
        wave[a]['twavelet2'] = []
        wave[a]['twavelet3'] = []
        wave[a]['twaveletx'] = []
        wave[a]['pwavelet'] = []

        wave[a]['twavelet0_max'] = []
        wave[a]['twavelet1_max'] = []
        wave[a]['twavelet2_max'] = []
        wave[a]['twavelet3_max'] = []
        wave[a]['twaveletx_max'] = []
        wave[a]['pwavelet_max'] = []

#   return wave

    for f in files:
        print('Doing file: ' + f)
        dic = xr.open_dataset(f)

        #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
        # print('smaller')
        #      continue

        outt0 = np.array(dic['tc_lag0'].values.copy())
        outt1 = np.array(dic['tc_lag1'].values.copy())
        outt2 = np.array(dic['tc_lag2'].values.copy())
        outt3 = np.array(dic['tc_lag3'].values.copy())
        outtx = np.array(dic['tc_lagx'].values.copy())

        outp = np.array(dic['p'].values.copy())
        outp[np.isnan(outp)] = -10**-5
        wave['porig_max'].append(np.percentile(outp[outp > 1], 99))
        wave['porig'].append(outp)

        looparr = [outt0, outt1, outt2, outt3, outtx]
        strarr = ['0', '1', '2', '3', 'x']

        for outt, strr in zip(looparr, strarr):

            wave['torig' + strr].append(outt)

            outt[np.isnan(outt)] = -40

            wav = util.waveletTP(outt, outp, 5)

            outt[np.where(dic['pmask'].values == 0)] = -40

            wave['torig' + strr + '_min'].append(
                np.percentile(outt0[outt0 < -40], 1))

            for pos in rpos:

                print(arr[pos])
                tt = np.array(wav['t'][pos, :, :])

                if strr == '0':
                    pp = np.array(wav['p'][pos, :, :])
                    wave[arr[pos]]['pwavelet'].append(pp)
                    wave[arr[pos]]['pwavelet_max'].append(
                        np.percentile(pp[pp > 0], 99))

                tt[np.where(dic['pmask'].values == 0)] = 0
                wave[arr[pos]]['twavelet' + strr].append(tt)
                wave[arr[pos]]['twavelet' + strr + '_max'].append(
                    np.percentile(tt[tt > 0], 99))

    pkl.dump(
        wave,
        open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc.p',
             'wb'))

    print('Saved!')
Example #19
0
def perSys():

    pool = multiprocessing.Pool(processes=5)
    tthresh = '-10'
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA350_4-8N_14W-10E_'+tthresh+'/')
    print('Nb files', len(files))
    mdic = defaultdict(list)
    res = pool.map(file_loop, files)
    pool.close()
    #
    #res = [item for sublist in res for item in sublist]  # flatten list of lists

    #
    p=[]
    t=[]

    for v in res:
        try:
            mdic['tmin'].append(v[0])
            mdic['pmax'].append(v[1])
            mdic['area'].append(v[2])
            mdic['ao60'].append(v[3])
            mdic['tmean'].append(v[4])
            mdic['pperc'].extend(v[5])
            mdic['clat'].append(v[6])
            mdic['po30'].append(v[7])
            mdic['isfin'].append(v[8])
            mdic['t'].append(v[9])
            mdic['lon30'].extend(v[10])
            mdic['lat30'].extend(v[11])
            mdic['lonisfin'].extend(v[12])
            mdic['latisfin'].extend(v[13])
            mdic['hour'].append(v[14])
            mdic['month'].append(v[15])
            mdic['latmin'].append(v[16])
            mdic['latmax'].append(v[17])
            mdic['isnz'].append(v[18])
            mdic['clon'].append(v[19])
            mdic['p'].append(v[20])
            mdic['pc'].append(v[21])
            mdic['year'].append(v[22])
        except TypeError:
            continue


        # if v[2]*25 > 1000000:
        #     tplt = v[9]
        #     tplt[np.where(tplt==np.nan)]=0
            # f = plt.figure()
            # ax = plt.axes(projection=ccrs.PlateCarree())
            # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree())
            # ax.coastlines()
            # plt.colorbar()
            # ax.add_feature(cartopy.feature.BORDERS, linestyle='--')



    # f = plt.figure()
    # siz = 3
    #
    # ax = f.add_subplot(1, 1, 1)
    # plt.scatter(mdic['tmin'], mdic['pmax'])
    # plt.title('bulk', fontsize=9)

    pkl.dump(mdic, open('/users/global/cornkle/data/CLOVER/saves/bulk_'+tthresh+'_zeroRain.p',
                           'wb'))
Example #20
0
    def __init__(self, msg_folder, y1=y1, y2=y2, months=None):

        yrange = range(y1, y2 + 1)  # 1998, 2014
        if months is None:
            mrange = range(1, 13)
        else:
            if len(months) > 1:
                mrange = months
            else:
                mrange = range(months[0], months[0] + 1)

        try:
            lpath = uarr.locate('lon.npz', msg_folder, exclude=None)
            spath = uarr.locate('lon_stitch.npz', msg_folder, exclude=None)
        except:
            print('Not a directory or no msg lat/lon found')
            return

        mpath = os.path.join(msg_folder, 'mfg_raw_binary')

        try:
            os.path.isdir(mpath)
        except:
            print('No mfg_raw_binary')
            quit()

        rfiles = []

        for yr, mo in itertools.product(
                yrange, mrange):  # rain_f4 files only available for 6 to 10

            filepath = os.path.join(mpath, str(yr), str(mo).zfill(2))
            try:
                files = glob.glob(mpath + os.sep + str(yr) + str(mo).zfill(2) +
                                  '*')

            except OSError:
                continue
            #print(rfiles)
            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        msg_latlon = np.load(lpath[0])
        mlon = msg_latlon['lon']
        mlat = msg_latlon['lat']

        msg_latlon_stitch = np.load(spath[0])
        slon = msg_latlon_stitch['lon']
        slat = msg_latlon_stitch['lat']

        self.lat = mlat
        self.lon = mlon
        self.nx = mlon.shape[1]
        self.ny = mlon.shape[0]

        self.stitch_lat = slat
        self.stitch_lon = slon
        self.stitch_nx = slon.shape[1]
        self.stitch_ny = slon.shape[0]

        years = []
        outfiles = []
        for r in rfiles:

            years.append(os.path.basename(r)[0:4])
            outfiles.append(r + os.sep + 'tir.gra')

        self.years = years
        self.root = msg_folder
        self.fpath = outfiles
Example #21
0
def readMCS_getWavelet_label():

    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')

    wave = {}

    strarr = ['0', '1', '2', '3', 'x']

    wave['porig'] = []
    wave['pw'] = []
    wave['tw0'] = []
    # wave['scales']=[]

    for st in strarr:
        wave['torig' + st] = []
        wave['tw' + st + '_max'] = []  # wavelet value at max point
        wave['pw' + st + '_max'] = []  # max p wavelet in radius
        wave['p' + st + '_max'] = []  # max p  in radius
        wave['p' + st + '_mean'] = []  # mean p in radius
        wave['t' + st + '_mean'] = []  # t mean in radius
        wave['t' + st + '_min'] = []  # t min in radius
        wave['pw' + st + '_mean'] = []  # mean p in radius
        wave['tw' + st + '_mean'] = []  # t mean in radius
        wave['scales' + st] = []
        wave['pnb' + st] = []

    cntmax = 0
    cntin = 0

    for f in files:
        print('Doing file: ' + f)
        dic = xr.open_dataset(f)

        #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
        # print('smaller')
        #      continue

        outp = np.array(dic['p'].values.copy())
        wave['porig'].append(outp)

        for strr in strarr:

            outt = np.array(dic['tc_lag' + strr].values.copy())
            outp[np.isnan(outp)] = -10**-5
            wave['torig' + strr].append(outt)

            outt[np.isnan(outt)] = 150
            outt[outt > -40] = 150
            grad = np.gradient(outt)
            outt[outt > -40] = -55
            o2 = outt.copy()
            nok = np.where(abs(grad[0]) > 80)
            d = 2
            i = nok[0]
            j = nok[1]

            for ii, jj in zip(i, j):
                kernel = o2[ii - d:ii + d + 1, jj - d:jj + d + 1]
                #  if not kernel.any():
                #   continue
                #   else:
                o2[ii - d:ii + d + 1,
                   jj - d:jj + d + 1] = ndimage.gaussian_filter(kernel,
                                                                3,
                                                                mode='nearest')

            wav = util.waveletTP_localMax(o2, outp, 5)
            o2[np.where(dic['pmask'].values == 0)] = np.nan

            if strr == '0':
                outp[np.where(dic['pmask'].values == 0)] = np.nan
                wave['pw'].append(wav['p'])
                wave['tw' + strr].append(wav['t'])
                #print(wav['scales'])
                cntmax = cntmax + len(wav['z'])

            xs = []
            ys = []
            for ss in [2, 6, 12]:
                maxoutt = (outt == ndimage.minimum_filter(outt,
                                                          ss,
                                                          mode='constant',
                                                          cval=np.amax(outt) +
                                                          1))
                maxoutt = maxoutt.astype(int)
                ypks, xpks = np.where((maxoutt == 1) & (outt < -55))
                ys.extend(ypks)
                xs.extend(xpks)

            radius = [2, 6, 12] * 5

            z = wav['z']
            y = wav['y']
            x = wav['x']

            for i in range(len(z)):

                zz = z[i]
                xx = x[i]
                yy = y[i]

                if dic['pmask'][
                        yy,
                        xx] == 0:  # if maximum falls in region where no TRMM exists, continue
                    continue

                sc = wav['scales'][zz]

                if strr == '0':
                    cntin = cntin + 1

                iscale = (np.ceil(wav['scales'][zz] / 2. / 5.)).astype(int)

                tw = wav['t'][zz, :, :].copy()
                pw = wav['p'][zz, :, :].copy()  #copy??

                tw[np.isnan(tw)] = 0
                pw[np.isnan(pw)] = 0
                tw[np.where(dic['pmask'].values == 0)] = np.nan
                pw[np.where(dic['pmask'].values == 0)] = np.nan

                #                pw[np.isnan(pw)]=1000
                #
                #                ax=plt.axes(projection=ccrs.PlateCarree())
                #                plt.contour(dic['lon'], dic['lat'], pw, levels=np.arange(500,1001,100), transform=ccrs.PlateCarree())
                #                plt.show()

                twmax = tw[yy, xx]
                print(twmax)

                #Find all indices within the local circle of radius iscale...
                # ... Then average over those indices
                xloc1 = np.arange(xx - iscale, xx + iscale + 1)
                yloc1 = np.arange(yy - iscale, yy + iscale + 1)
                xloc, yloc = np.meshgrid(xloc1, yloc1)
                distloc = ((xloc - xx)**2 + (yloc - yy)**2)**.5

                indloc = (distloc <= iscale).nonzero()
                ycirc = indloc[0] - iscale + yy
                xcirc = indloc[1] - iscale + xx

                #  print('pwshape',pw.shape[0], pw.shape[1] )
                #  print('twshape',tw.shape[0], tw.shape[1] )

                noky = np.where(
                    ycirc >= pw.shape[0])  # if the circle is off the edge
                if noky[0].size > 0:
                    ycirc = np.delete(ycirc, noky)
                    xcirc = np.delete(xcirc, noky)

                nokx = np.where(xcirc >= pw.shape[1])
                if nokx[0].size > 0:
                    ycirc = np.delete(ycirc, nokx)
                    xcirc = np.delete(xcirc, nokx)

                tmean = np.nanmean(dic['tc_lag' + strr].values[ycirc, xcirc])
                pmean = np.nanmean(dic['p'].values[ycirc, xcirc])
                twmean = np.nanmean(tw[ycirc, xcirc])
                pwmean = np.nanmean(pw[ycirc, xcirc])
                pmax = np.nanmax(outp[ycirc, xcirc])
                pwmax = np.nanmax(pw[ycirc, xcirc])
                tmin = np.nanmin(outt[ycirc, xcirc])
                pnb = ycirc.size

                wave['tw' + strr + '_max'].append(twmax)
                wave['pw' + strr + '_max'].append(pwmax)
                wave['tw' + strr + '_mean'].append(twmean)
                wave['pw' + strr + '_mean'].append(pwmean)
                wave['p' + strr + '_max'].append(pmax)
                wave['p' + strr + '_mean'].append(pmean)
                wave['t' + strr + '_mean'].append(tmean)
                wave['t' + strr + '_min'].append(tmin)
                wave['scales' + strr].append(sc)
                wave['pnb' + strr].append(pnb)

                #just append all the variables into a dictionary now!

    for k in wave:
        if isinstance(wave[k][0], np.ndarray):
            continue
        print(k)
        wave[k] = np.array(wave[k])

    pkl.dump(
        wave,
        open(
            '/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_label.p',
            'wb'))

    print('Saved!')
    print('Found ' + str(cntmax) + ' maxima in ' + str(len(files)) +
          ' systems.')
    print(str(cntin) + ' maxima coincided with TRMM')
Example #22
0
    def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None):


        min_rain_swath = 200
        min_rain_box = 200
        min_tpixel = 2500
        rain_thresh = 0.1

        if not os.path.isdir(trmm_folder):
            print('Not a directory')
            quit()

        fdic = {'fpath': [], 'tmins': [], 'date': []}
        rfiles = []

        for yr, mo in itertools.product(yrange, mrange):  # rain_f4 files only available for 6 to 10

            tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('_rain_f4.gra', tpath)
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        if not rfiles:
            print('No trmm files found')
            return

            #  self.fpath=fdic['fpath']
            #  return
        for eachfile in rfiles:
            rain_str = eachfile.replace('_rain_f4', '')
            time_str = eachfile.replace('_rain_f4', '_time')
            rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day

            secmean = rr.mean()
            t = ut.sec_to_time(secmean)

            if not t.hour in hod:
                continue

            rr = np.fromfile(rain_str, dtype=np.int16)
            x = 49  # trmm swath is always 49 wide
            nb = rr.size
            single = int(nb / 4)  # variables lon lat rainrate flag

            lons = rr[0:single]
            lats = rr[single:2 * single]
            rainrs = rr[2 * single:3 * single]
            y = int(lons.size / x)
            lons = np.resize(lons, (y, x))
            lats = np.resize(lats, (y, x))
            rainrs = np.resize(rainrs, (y, x))
            lont = lons / 100.
            latt = lats / 100.
            rain = rainrs / 10.

            if np.sum(rain>rain_thresh) < min_rain_swath:  # minimum TRMM rainfall > 0.1 in swath
                continue
            if area:
                box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3]))

                if not box[0].any():
                    continue
                    #       print(len(box[0]))
                if len(box[0]) < min_tpixel:  # minimum pixel overlap with TRMM and box (50000km2)
                    continue
                if np.sum(rain[box]>rain_thresh) < min_rain_box:  # minimum rainfall in defined box
                    continue


            fdic['fpath'].append(rain_str)
            # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
            #                  0)

            fdic['date'].append(pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
                             0))

        self.fpaths = fdic['fpath']
        self.dates = pd.Series(fdic['date'])
        self.__area = area
Example #23
0
def readMCS_getWavelet_tresh():

    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')

    #    arr=np.array([15,   16,   17,   18,   19,   20,   21,   22,   24,
    #         25,   27,   28,   30,   32,   34,   36,   38,   40,
    #         42,   45,   48,   50,   53,   57,   60,   64,   67,
    #         71,   76,   80,   85,   90,   95,  101,  107,  113,
    #        120,  127,  135,  143,  151,  160,  170,  180,  190,  202], dtype=str)

    arr = np.array([
        10, 11, 11, 12, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27,
        28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60
    ],
                   dtype=str)

    ext = np.array([10, 15, 20, 30, 60], dtype=str)
    rrange = list(range(arr.size))
    scales = np.in1d(arr, ext)
    rpos = np.array(rrange)[scales]

    print(rpos)

    wave = {}
    wave['torig0_min'] = []
    wave['torig1_min'] = []
    wave['torig2_min'] = []
    wave['torig3_min'] = []
    wave['torigx_min'] = []
    wave['porig_max'] = []

    wave['torig0'] = []
    wave['torig1'] = []
    wave['torig2'] = []
    wave['torig3'] = []
    wave['torigx'] = []
    wave['porig'] = []

    for a in ext:
        wave[a] = {}

        wave[a]['twavelet0'] = []
        wave[a]['twavelet1'] = []
        wave[a]['twavelet2'] = []
        wave[a]['twavelet3'] = []
        wave[a]['twaveletx'] = []
        wave[a]['pwavelet'] = []

        wave[a]['twavelet0_max'] = []
        wave[a]['twavelet1_max'] = []
        wave[a]['twavelet2_max'] = []
        wave[a]['twavelet3_max'] = []
        wave[a]['twaveletx_max'] = []
        wave[a]['pwavelet_max'] = []

#   return wave

    for f in files:
        print('Doing file: ' + f)
        dic = xr.open_dataset(f)

        #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
        # print('smaller')
        #      continue

        outt0 = np.array(dic['tc_lag0'].values.copy())
        outt1 = np.array(dic['tc_lag1'].values.copy())
        outt2 = np.array(dic['tc_lag2'].values.copy())
        outt3 = np.array(dic['tc_lag3'].values.copy())
        outtx = np.array(dic['tc_lagx'].values.copy())

        mmeans = np.percentile(outt0[np.isfinite(outt0)], 30)

        print(mmeans)

        maxi = np.nanmin(outt0)
        thresh = maxi + 15

        outp = np.array(dic['p'].values.copy())

        outp[np.isnan(outp)] = -10**-5

        outt0[np.isnan(outt0)] = mmeans  # -40
        outt0[outt0 > thresh] = mmeans
        outt1[np.isnan(outt1)] = mmeans
        outt1[outt1 > thresh] = mmeans
        outt2[np.isnan(outt2)] = mmeans
        outt2[outt2 > thresh] = mmeans
        outt3[np.isnan(outt3)] = mmeans
        outt3[outt3 > thresh] = mmeans
        outtx[np.isnan(outtx)] = mmeans
        outtx[outtx > thresh] = mmeans
        #    outt2[outt2>thresh]=-40#mmeans

        if np.mean(outt0) == mmeans:
            continue
        if np.mean(outt1) == mmeans:
            continue
        if np.mean(outt2) == mmeans:
            continue
        if np.mean(outt3) == mmeans:
            continue
        if np.mean(outtx) == mmeans:
            continue

        if not outtx[outtx < mmeans].any():
            continue

        print('Wavelet start')

        wav0 = util.waveletTP(outt0, outp, 5)

        wav1 = util.waveletTP(outt1, outp, 5)

        wav2 = util.waveletTP(outt2, outp, 5)

        wav3 = util.waveletTP(outt3, outp, 5)

        wavx = util.waveletTP(outtx, outp, 5)

        #print(wav1['scales'])

        #  outt0[np.where(dic['pmask'].values==0)]=mmeans
        #  outt1[np.where(dic['pmask'].values==0)]=mmeans
        #  outt2[np.where(dic['pmask'].values==0)]=mmeans
        #   outt3[np.where(dic['pmask'].values==0)]=mmeans
        #   outtx[np.where(dic['pmask'].values==0)]=mmeans

        wave['torig0_min'].append(np.percentile(outt0[outt0 < mmeans], 1))
        wave['torig1_min'].append(np.percentile(outt1[outt1 < mmeans], 1))
        wave['torig2_min'].append(np.percentile(outt2[outt2 < mmeans], 1))
        wave['torig3_min'].append(np.percentile(outt3[outt3 < mmeans], 1))
        wave['torigx_min'].append(np.percentile(outtx[outtx < mmeans], 1))
        wave['porig_max'].append(np.percentile(outp[outp > 1], 99))

        wave['torig0'].append(outt0)
        wave['torig1'].append(outt1)
        wave['torig2'].append(outt2)
        wave['torig3'].append(outt3)
        wave['torigx'].append(outtx)
        wave['porig'].append(outp)

        for pos in rpos:

            print(arr[pos])
            tt0 = np.array(wav0['t'][pos, :, :])
            tt1 = np.array(wav1['t'][pos, :, :])
            tt2 = np.array(wav2['t'][pos, :, :])
            tt3 = np.array(wav3['t'][pos, :, :])
            ttx = np.array(wavx['t'][pos, :, :])
            pp = np.array(wav2['p'][pos, :, :])

            tt0[np.where(dic['pmask'].values == 0)] = 0
            tt1[np.where(dic['pmask'].values == 0)] = 0
            tt2[np.where(dic['pmask'].values == 0)] = 0
            tt3[np.where(dic['pmask'].values == 0)] = 0
            ttx[np.where(dic['pmask'].values == 0)] = 0

            wave[arr[pos]]['twavelet0'].append(tt0)
            wave[arr[pos]]['twavelet1'].append(tt1)
            wave[arr[pos]]['twavelet2'].append(tt2)
            wave[arr[pos]]['twavelet3'].append(tt3)
            wave[arr[pos]]['twaveletx'].append(ttx)
            wave[arr[pos]]['pwavelet'].append(pp)

            wave[arr[pos]]['twavelet0_max'].append(
                np.percentile(tt0[tt0 > 0], 99))
            wave[arr[pos]]['twavelet1_max'].append(
                np.percentile(tt1[tt1 > 0], 99))
            wave[arr[pos]]['twavelet2_max'].append(
                np.percentile(tt2[tt2 > 0], 99))
            wave[arr[pos]]['twavelet3_max'].append(
                np.percentile(tt3[tt3 > 0], 99))
            wave[arr[pos]]['twaveletx_max'].append(
                np.percentile(ttx[ttx > 0], 99))
            wave[arr[pos]]['pwavelet_max'].append(np.percentile(
                pp[pp > 0], 99))

    pkl.dump(
        wave,
        open(
            '/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc_thresh.p',
            'wb'))

    print('Saved!')
Example #24
0
def readMCS_getWavelet_label():
             
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')
    
    wave={}
    
    strarr=['0', '1', '2', '3', 'x']
       
    wave['porig']=[]
    wave['pw']=[]
    wave['tw0']=[]
   # wave['scales']=[]
    
    for st in strarr:    
        wave['torig'+st]=[]         
        wave['tw'+st+'_max']=[]     # wavelet value at max point
        wave['pw'+st+'_max']=[]    # max p wavelet in radius      
        wave['p'+st+'_max']=[]    # max p  in radius       
        wave['p'+st+'_mean']=[]    # mean p in radius
        wave['t'+st+'_mean']=[]     # t mean in radius   
        wave['t'+st+'_min']=[]     # t min in radius   
        wave['pw'+st+'_mean']=[]    # mean p in radius
        wave['tw'+st+'_mean']=[]     # t mean in radius  
        wave['scales'+st]=[] 
        wave['pnb'+st]=[] 
                      
               
    cntmax=0 
    cntin=0      
       
    for f in files:
        print('Doing file: '+f)
        dic = xr.open_dataset(f)
        
     #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
          # print('smaller') 
     #      continue       
                
        outp=np.array(dic['p'].values.copy())                
        wave['porig'].append(outp)
        
                
        for strr in strarr:
            
            outt=np.array(dic['tc_lag'+strr].values.copy())
            outp[np.isnan(outp)]=-10**-5
            wave['torig'+strr].append(outt)    
                        
            outt[np.isnan(outt)]=150
            outt[outt>-40]=150
            grad=np.gradient(outt)
            outt[outt>-40]=-55
            o2=outt.copy()
            nok = np.where(abs(grad[0]) > 80)
            d=2
            i=nok[0]
            j=nok[1]    
        
            for ii,jj in zip(i,j):    
                kernel=o2[ii-d:ii+d+1, jj-d:jj+d+1]
              #  if not kernel.any():
                 #   continue
         #   else:    
                o2[ii-d:ii+d+1, jj-d:jj+d+1]=ndimage.gaussian_filter(kernel, 3, mode='nearest')
                  
            wav = util.waveletTP_localMax(o2, outp, 5) 
            o2[np.where(dic['pmask'].values==0)]=np.nan   
                        
            if strr=='0':
              outp[np.where(dic['pmask'].values==0)]=np.nan  
              wave['pw'].append(wav['p'])
              wave['tw'+strr].append(wav['t'])  
              #print(wav['scales'])              
              cntmax = cntmax+len(wav['z'])
            
            xs = []
            ys = []
            for ss in [2, 6, 12]:
                maxoutt = (outt == ndimage.minimum_filter(outt,ss, mode='constant',cval=np.amax(outt)+1))
                maxoutt = maxoutt.astype(int)
                ypks,xpks=np.where((maxoutt==1) & (outt < -55))
                ys.extend(ypks)
                xs.extend(xpks)
                
            radius = [2, 6, 12]*5     
           
            z = wav['z'] 
            y = wav['y']  
            x = wav['x']  
             
            for i in range(len(z)):  
                                                
                zz = z[i]
                xx = x[i]
                yy = y[i]                

                                                
                if dic['pmask'][yy,xx]==0:    # if maximum falls in region where no TRMM exists, continue                   
                    continue
                
                sc=wav['scales'][zz]                                   
                                    
                if strr=='0':                       
                    cntin = cntin+1
                                                  
                iscale = (np.ceil(wav['scales'][zz]/2./5.)).astype(int)
                
                tw = wav['t'][zz, :, :].copy()
                pw = wav['p'][zz, :, :].copy()   #copy??
                
                tw[np.isnan(tw)] = 0
                pw[np.isnan(pw)] = 0
                tw[np.where(dic['pmask'].values==0)]=np.nan            
                pw[np.where(dic['pmask'].values==0)]=np.nan     
                
#                pw[np.isnan(pw)]=1000
#
#                ax=plt.axes(projection=ccrs.PlateCarree())                
#                plt.contour(dic['lon'], dic['lat'], pw, levels=np.arange(500,1001,100), transform=ccrs.PlateCarree())                                     
#                plt.show()    
                           
                twmax=tw[yy,xx]     
                print(twmax)
               
                #Find all indices within the local circle of radius iscale...
                # ... Then average over those indices
                xloc1 = np.arange(xx-iscale,xx+iscale+1)
                yloc1 = np.arange(yy-iscale,yy+iscale+1)
                xloc,yloc = np.meshgrid(xloc1,yloc1)
                distloc = ( (xloc-xx)**2 + (yloc-yy)**2 ) ** .5

                indloc = (distloc <= iscale).nonzero()
                ycirc = indloc[0] - iscale + yy
                xcirc = indloc[1] - iscale + xx   
                
              #  print('pwshape',pw.shape[0], pw.shape[1] )
              #  print('twshape',tw.shape[0], tw.shape[1] )

                noky=np.where(ycirc>=pw.shape[0])   # if the circle is off the edge                               
                if noky[0].size>0:
                    ycirc=np.delete(ycirc,noky)
                    xcirc=np.delete(xcirc,noky)
                    
                nokx=np.where(xcirc>=pw.shape[1])                                  
                if nokx[0].size>0:
                    ycirc=np.delete(ycirc,nokx)
                    xcirc=np.delete(xcirc,nokx)    
                               
                tmean=np.nanmean(dic['tc_lag'+strr].values[ycirc, xcirc])
                pmean=np.nanmean(dic['p'].values[ycirc, xcirc])
                twmean=np.nanmean(tw[ycirc, xcirc])
                pwmean=np.nanmean(pw[ycirc, xcirc])
                pmax=np.nanmax(outp[ycirc, xcirc])
                pwmax=np.nanmax(pw[ycirc, xcirc])
                tmin=np.nanmin(outt[ycirc, xcirc])
                pnb=ycirc.size
              
                wave['tw'+strr+'_max'].append(twmax)            
                wave['pw'+strr+'_max'].append(pwmax)
                wave['tw'+strr+'_mean'].append(twmean)            
                wave['pw'+strr+'_mean'].append(pwmean)
                wave['p'+strr+'_max'].append(pmax)
                wave['p'+strr+'_mean'].append(pmean)
                wave['t'+strr+'_mean'].append(tmean)
                wave['t'+strr+'_min'].append(tmin)
                wave['scales'+strr].append(sc)
                wave['pnb'+strr].append(pnb)
               
                
                #just append all the variables into a dictionary now!                                                 
    
    for k in wave:
        if isinstance(wave[k][0], np.ndarray):
            continue
        print(k)
        wave[k]=np.array(wave[k])
    
    pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_label.p', 'wb'))
    
    print('Saved!') 
    print('Found '+str(cntmax)+' maxima in '+str(len(files))+' systems.')
    print(str(cntin)+' maxima coincided with TRMM')    
Example #25
0
def perSys():

    pool = multiprocessing.Pool(processes=5)
    tthresh = '-50'
    files = ua.locate(
        ".nc",
        '/users/global/cornkle/data/CP4/CLOVER/MCS_-50_1000km2_JA_sahel')
    print('Nb files', len(files))
    mdic = defaultdict(list)
    res = pool.map(file_loop, files)
    pool.close()
    #
    #res = [item for sublist in res for item in sublist]  # flatten list of lists

    #
    p = []
    t = []

    for v in res:
        try:
            mdic['tmin'].append(v[0])
            mdic['pmax'].append(v[1])
            mdic['area'].append(v[2])
            mdic['ao60'].append(v[3])
            mdic['tmean'].append(v[4])
            mdic['pperc'].extend(v[5])
            mdic['clat'].append(v[6])
            mdic['po30'].append(v[7])
            mdic['isfin'].append(v[8])
            mdic['t'].append(v[9])
            mdic['lon30'].extend(v[10])
            mdic['lat30'].extend(v[11])
            mdic['lonisfin'].extend(v[12])
            mdic['latisfin'].extend(v[13])
            mdic['hour'].append(v[14])
            mdic['month'].append(v[15])
            mdic['latmin'].append(v[16])
            mdic['latmax'].append(v[17])
            mdic['isnz'].append(v[18])
            mdic['clon'].append(v[19])
            mdic['p'].append(v[20])
            mdic['year'].append(v[21])
            mdic['date'].append(v[22])
        except TypeError:
            continue

        # if v[2]*25 > 1000000:
        #     tplt = v[9]
        #     tplt[np.where(tplt==np.nan)]=0
        # f = plt.figure()
        # ax = plt.axes(projection=ccrs.PlateCarree())
        # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree())
        # ax.coastlines()
        # plt.colorbar()
        # ax.add_feature(cartopy.feature.BORDERS, linestyle='--')

    # f = plt.figure()
    # siz = 3
    #
    # ax = f.add_subplot(1, 1, 1)
    # plt.scatter(mdic['tmin'], mdic['pmax'])
    # plt.title('bulk', fontsize=9)

    pkl.dump(
        mdic,
        open(
            '/users/global/cornkle/data/CLOVER/saves/bulk_' + tthresh +
            '_zeroRain_gt1k_shear_CP4_JA_sahel.p', 'wb'))
    f.subplots_adjust(right=0.86)

    cax = f.add_axes([0.87, 0.545, 0.025, 0.415])
    cb = plt.colorbar(mt, cax=cax, label='Cloud-top temperature ($^{\circ}$C)')
    cb.ax.tick_params(labelsize=12)

    cax = f.add_axes([0.87, 0.065, 0.025, 0.175])
    cb = plt.colorbar(mp, cax=cax, label='Wavelet power')
    cb.ax.tick_params(labelsize=12)

    fsiz = 14
    x = 0.02
    plt.annotate('a)', xy=(x, 0.96), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'),
                 textcoords='offset points')
    plt.annotate('b)', xy=(x, 0.51), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'),
                 textcoords='offset points')
    plt.annotate('c)', xy=(x, 0.245), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'),
                 textcoords='offset points')

    plt.show()
    spath = '/users/global/cornkle/C_paper/wavelet/figs/paper/'
    plt.savefig(spath+'/method2.png', dpi=300)

    dic.close()

    plt.close('all')


if __name__ == "__main__":
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/')
    run(files[238])
Example #27
0
def readMCS_getWavelet():
             
    files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/')
    
#    arr=np.array([15,   16,   17,   18,   19,   20,   21,   22,   24,
#         25,   27,   28,   30,   32,   34,   36,   38,   40,
#         42,   45,   48,   50,   53,   57,   60,   64,   67,
#         71,   76,   80,   85,   90,   95,  101,  107,  113,
#        120,  127,  135,  143,  151,  160,  170,  180,  190,  202], dtype=str)
    
    arr=np.array([10, 11, 11, 12,13, 13, 14,15,   16,   17,   18,   19,   20,   21,   22,   24,
         25,   27,   28,   30,   32,   34,   36,   38,   40,
         42,   45,   48,   50,   53,   57,   60], dtype=str)
         
    ext=np.array([10,15,20,30,60], dtype=str)      
    rrange=list(range(arr.size))
    scales=np.in1d(arr, ext)
    rpos=np.array(rrange)[scales]
    
    print(rpos)
    
    wave={}
    wave['torig0_min']=[]
    wave['torig1_min']=[]
    wave['torig2_min']=[]
    wave['torig3_min']=[]
    wave['torigx_min']=[]
    wave['porig_max']=[]
    
    wave['torig0']=[]    
    wave['torig1']=[]
    wave['torig2']=[]
    wave['torig3']=[]
    wave['torigx']=[]
    wave['porig']=[]
            
    for a in ext:              
        wave[a]={}           
        
        wave[a]['twavelet0']=[]
        wave[a]['twavelet1']=[]
        wave[a]['twavelet2']=[]
        wave[a]['twavelet3']=[]
        wave[a]['twaveletx']=[]
        wave[a]['pwavelet']=[]           
        
        wave[a]['twavelet0_max']=[]
        wave[a]['twavelet1_max']=[]
        wave[a]['twavelet2_max']=[]
        wave[a]['twavelet3_max']=[]
        wave[a]['twaveletx_max']=[]
        wave[a]['pwavelet_max']=[]
               
 #   return wave   
    
    for f in files:
        print('Doing file: '+f)
        dic = xr.open_dataset(f)
        
     #   if (dic['time.hour'].values<15) or (dic['time.hour'].values>21):
          # print('smaller') 
     #      continue       
        
        outt0=np.array(dic['tc_lag0'].values.copy())
        outt1=np.array(dic['tc_lag1'].values.copy())
        outt2=np.array(dic['tc_lag2'].values.copy())
        outt3=np.array(dic['tc_lag3'].values.copy())
        outtx=np.array(dic['tc_lagx'].values.copy())
        
        outp=np.array(dic['p'].values.copy())
        outp[np.isnan(outp)]=-10**-5
        wave['porig_max'].append(np.percentile(outp[outp>1], 99))
        wave['porig'].append(outp)
        
        looparr=[outt0, outt1, outt2, outt3, outtx]
        strarr=['0', '1', '2', '3', 'x']
    
        for outt, strr in zip(looparr, strarr):
            
            wave['torig'+strr].append(outt)    
                        
            outt[np.isnan(outt)]=-40

            wav = util.waveletTP(outt, outp, 5) 
                
            outt[np.where(dic['pmask'].values==0)]=-40
     
            wave['torig'+strr+'_min'].append(np.percentile(outt0[outt0<-40], 1))        
      
              
            for pos in rpos:  

                print(arr[pos])            
                tt=np.array(wav['t'][pos,:,:])                  
                
                if strr=='0':
                    pp=np.array(wav['p'][pos,:,:])
                    wave[arr[pos]]['pwavelet'].append(pp)  
                    wave[arr[pos]]['pwavelet_max'].append(np.percentile(pp[pp>0], 99))
            
                tt[np.where(dic['pmask'].values==0)]=0                                                                 
                wave[arr[pos]]['twavelet'+strr].append(tt)         
                wave[arr[pos]]['twavelet'+strr+'_max'].append(np.percentile(tt[tt>0], 99))                                                 
    
    pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc.p', 'wb'))
    
    print('Saved!')   
Example #28
0
    def __init__(self,
                 trmm_folder,
                 yrange=YRANGE,
                 mrange=MRANGE,
                 hod=HOD,
                 area=None):

        min_rain_swath = 200
        min_rain_box = 200
        min_tpixel = 2500
        rain_thresh = 0.1

        if not os.path.isdir(trmm_folder):
            print('Not a directory')
            quit()

        fdic = {'fpath': [], 'tmins': [], 'date': []}
        rfiles = []

        for yr, mo in itertools.product(
                yrange, mrange):  # rain_f4 files only available for 6 to 10

            tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('.7.gra', tpath)
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        if not rfiles:
            print('No trmm files found')
            return

            #  self.fpath=fdic['fpath']
            #  return
        for eachfile in rfiles:
            rain_str = eachfile
            time_str = eachfile.replace('.7.', '.7_time.')
            try:
                rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day
            except FileNotFoundError:
                print(time_str + ' missing, continue')
                continue

            secmean = rr.mean()
            try:
                t = ut.sec_to_time(secmean)
            except ValueError:
                print('ValueError sec to time')
                continue
            if not t.hour in hod:
                continue

            rr = np.fromfile(rain_str, dtype=np.int16)
            x = 49  # trmm swath is always 49 wide
            nb = rr.size
            single = int(nb / 4)  # variables lon lat rainrate flag

            lons = rr[0:single]
            lats = rr[single:2 * single]
            rainrs = rr[2 * single:3 * single]
            y = int(lons.size / x)
            lons = np.resize(lons, (y, x))
            lats = np.resize(lats, (y, x))
            rainrs = np.resize(rainrs, (y, x))
            lont = lons / 100.
            latt = lats / 100.
            rain = rainrs / 10.

            if np.sum(
                    rain > rain_thresh
            ) < min_rain_swath:  # minimum TRMM rainfall > 0.1 in swath
                continue
            if area:
                box = np.where((lont > area[0]) & (lont < area[1])
                               & (latt > area[2]) & (latt < area[3]))

                if not box[0].any():
                    continue
                    #       print(len(box[0]))
                if len(
                        box[0]
                ) < min_tpixel:  # minimum pixel overlap with TRMM and box (50000km2)
                    continue
                if np.sum(rain[box] > rain_thresh
                          ) < min_rain_box:  # minimum rainfall in defined box
                    continue

            fdic['fpath'].append(rain_str)
            # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
            #                  0)

            fdic['date'].append(
                pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]),
                            int(rain_str[-14:-12]), t.hour, t.minute, 0))

        self.fpaths = fdic['fpath']
        self.dates = pd.Series(fdic['date'])
        self.__area = area
Example #29
0
import numpy as np
from utils import u_arrays as ua
from collections import OrderedDict
import pandas as pd
import multiprocessing
import pickle as pkl
from scipy.ndimage.measurements import label
import pdb

dic = pkl.load(
    open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p', 'rb'))

p30 = np.array(dic['po30'])
lat = np.array(dic['clat'])
mcs_count = np.sum(p30[(lat >= 4) & (lat <= 7.5)])

files = ua.locate(".nc", '/users/global/cornkle/TRMMfiles')
cnt = 0
for f in files:
    print('Doing ', f)
    xa = xr.open_dataset(f)

    lat = xa.lat.values
    lon = xa.lon.values
    arr = xa['p'].values
    arr = arr[(lat >= 4) & (lat <= 7.8) & (lon >= -17) & (lon <= 20)]
    nb = np.sum(arr >= 30)

    cnt += nb

print('MCS frac', mcs_count / cnt)