def test_getData_area_cut_date(self): # both testfiles have enough rainfall in swath at 0.5 mm threshold and given box area=[-15, 5, 15, 20] min_rain_swath = 2000 min_rain_box = 500 min_tpixel = 2500 rain_thresh = 0.5 obj = trmm.ReadWA(test_dir, area=area) td=obj.get_data(obj.fpaths[1]) box = np.where((td['lon'].values > area[0]) & (td['lon'].values < area[2]) & (td['lat'].values > area[1]) & (td['lat'].values < area[3])) # files are properly filtered according to the rain/box overlap thresholds assert len(box[0]) > min_tpixel assert np.sum(td['p'].values[box] > rain_thresh) > min_rain_box # use cut on two files obj = trmm.ReadWA(test_dir) td = obj.get_data(obj.fpaths[0], cut = [8,10]) assert td['lat'].values[:, 0].max() <= 10 assert td['lat'].values[:, 0].min() >= 8 td = obj.get_data(obj.fpaths[1]) assert td['lat'].values[:, 0].max() >= 10 assert td['lat'].values[:, 0].min() <= 8 # get time and via index get same array, cut works the same td = obj.get_ddata(2007,8,16,19,16, cut = [8,10]) assert td['lat'].values[:, 0].max() <= 10 assert td['lat'].values[:, 0].min() >= 8 td2 = obj.get_data(obj.fpaths[0], cut = [8, 10]) assert_array_equal(td2['p'].values, td['p'].values) assert_array_equal(td2['flags'].values, td['flags'].values)
def test_trmmReadWA(self): obj = trmm.ReadWA(test_dir) files = obj.fpaths assert files == [ test_dir+'/2007/08/2A25.20070816.55562.7.gra', test_dir+'/2007/08/2A25.20070817.55577.7.gra'] with self.assertRaises(SystemExit): trmm.ReadWA(test_dir, yrange=range(1999,2000))
def test_write_netcdf(self): obj = trmm.ReadWA(test_dir) with self.assertRaises(OSError): da = obj.get_data(obj.fpaths[1], netcdf_path='/does/not/exist/test.nc') da = obj.get_data(obj.fpaths[1], netcdf_path= test_write+'/test.nc')
def test_ll_to_MSG_TRMM(self): test_dir = '/users/global/cornkle/data/pythonWorkspace/proj_CEH/eod/tests/test_files/trmm' obj = trmm.ReadWA(test_dir) dat = obj.get_data(obj.fpaths[0], cut=[3, 4]) lon = dat['lon'].values lat = dat['lat'].values dir = tm_utils.ll_toMSG(lon, lat) assert np.unique(ua.unique_of_pair(dir['x'], dir['y'])).size == lon.size
def netcdf(): trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/" box = [-11, 9, 11, 21] # W, S, E, N # # # make grid # # define projection # proj = pyproj.Proj('+proj=merc +lat_0=0. +lon_0=0.') # # get lower left x,y fr 10W, 4N # x, y = pyproj.transform(salem.wgs84, proj, [box[0], box[2]], [box[1], box[3]]) # dx = 5000 # 5km grid # nx, r = divmod(x[1] - x[0], dx) # ny, r = divmod(y[1] - y[0], dx) # # make salem grid # grid = salem.Grid(nxny=(nx, ny), dxdy=(5000, 5000), ll_corner=(x[0], y[0]), proj=proj) lsta = xr.open_dataset(constants.LSTA_TESTFILE) grid = lsta.salem.grid xi, yi = grid.ij_coordinates lon, lat = grid.ll_coordinates t = trmm.ReadWA(trmm_folder, yrange=YRANGE, area=[box[0], box[1], box[2], box[3]]) cnt = 0 # cycle through TRMM dates - only dates tat have a certain number of pixels in llbox are considered for _y, _m, _d, _h, _mi in zip(t.dates.y, t.dates.m, t.dates.d, t.dates.h, t.dates.mi): # dummy = np.empty((ny,nx))*-100#np.NAN td = t.get_ddata(_y, _m, _d, _h, _mi, cut=[box[1], box[3]]) date = [pd.datetime(_y, _m, _d, _h, _mi)] print(date) # ensure minimum trmm rainfall in area if len(np.where(td['p'].values > 0)[0]) < 100: # at least 100 pixel with rainfall print('Kickout: TRMM min pixel = 100') continue # Transform lons, lats to grid xt, yt = grid.transform(td['lon'].values.flatten(), td['lat'].values.flatten(), crs=salem.wgs84) # Convert for griddata input tpoints = np.array((yt, xt)).T inter = np.array((np.ravel(yi), np.ravel(xi))).T # Interpolate using delaunay triangularization dummyt = griddata(tpoints, td['p'].values.flatten(), inter, method='linear') outt = dummyt.reshape((grid.ny, grid.nx)) for nb in range(5): boole = np.isnan(outt) outt[boole] = -1000 grad = np.gradient(outt) outt[boole] = np.nan outt[abs(grad[1]) > 300] = np.nan outt[abs(grad[0]) > 300] = np.nan if np.nanmin(outt)<0: continue print('Makes no sense!') # # add MSG # # define the "0 lag" frist # msg_folder = '/users/global/cornkle/data/OBS/meteosat_SA15' # m = msg.ReadMsg(msg_folder) # arr = np.array([15, 30, 45, 60, 0]) # dm = arr - _mi # ind = (np.abs(dm)).argmin() # # dt0 = dm[ind] # ndate = date + dt.timedelta(minutes=int(dt0)) # # m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) # # if not m.dpath: # print('Date missing') # out = np.empty_like(outt) # out.fill(np.nan) # # else: # ml0 = m.get_data(llbox=box) # # xm, ym = grid.transform(ml0['lon'].values.flatten(), ml0['lat'].values.flatten(), crs=salem.wgs84) # mpoints = np.array((ym, xm)).T # out = griddata(mpoints, ml0['t'].values.flatten(), inter, method='linear') # out = out.reshape((grid.ny, grid.nx)) # # # da = xr.Dataset({'p': (['x', 'y'], outt), # # 't': (['x', 'y'], out) # }, # coords={'lon': (['x', 'y'], lon), # 'lat': (['x', 'y'], lat), # 'time': date}) da = xr.DataArray(outt[None,...], coords={'time': date, 'lat': lat[:,0], 'lon': lon[0,:]}, dims=['time', 'lat', 'lon']) # [np.newaxis, :] ds = xr.Dataset({'p': da}) savefile = '/users/global/cornkle/TRMMfiles/' + date[0].strftime('%Y-%m-%d_%H:%M:%S') + '.nc' try: os.remove(savefile) except OSError: pass ds.to_netcdf(path=savefile, mode='w') print('Saved ' + savefile) cnt = cnt + 1 print('Saved ' + str(cnt) + ' TRMM swaths as netcdf.')
def tm_overlap_blobs(): trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/" msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30' tObj = trmm.ReadWA(trmm_folder, area=AREA, yrange=YRANGE) mObj = msg.ReadMsg(msg_folder) files = tObj.fpaths dates = tObj.dates mdic = defaultdict(list) mdic_f = defaultdict(list) mlon = mObj.lon mlat = mObj.lat mll = tm_utils.ll_toMSG(mlon, mlat) mxy = ua.unique_of_pair(mll['x'], mll['y']) cnt = 0 datess = [] # cycle through TRMM dates - only dates tat have a certain number of pixels in llbox are considered for _y, _m, _d, _h, _mi in zip(dates.y, dates.m, dates.d, dates.h, dates.mi): # set zero shift time for msg date = dt.datetime(_y, _m, _d, _h, _mi) dt0 = tm_utils.minute_delta(_mi, 30) print('TRMM', date, 'dt', dt0, 'MSG', date + dt.timedelta(minutes=int(dt0))) #time difference max # if abs(dt0) > 4: # continue ndate = date + dt.timedelta(minutes=int(dt0)) print('TRMM', date, 'MSG', ndate) mObj.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) if not (mObj.tpath or mObj.bpath): print('No table or blob file, continue') continue dff = mObj.get_table() dstring = str(ndate.year) + '-' + str( ndate.month).zfill(2) + '-' + str(ndate.day).zfill(2) + ' ' + str( ndate.hour).zfill(2) + ':' + str( ndate.minute).zfill(2) + ':' + str(00).zfill(2) if not dstring in dff['Date'].as_matrix(): continue sel = dff.loc[dff['Date'] == dstring] big = sel.loc[sel['Area'] >= 25000] # only mcs over 25.000km2 print('big area', big['Area'].values) if big.empty: continue td = tObj.get_ddata(_y, _m, _d, _h, _mi, cut=[0, 22]) try: if not td: print('TRMM problem') continue except: pass md = mObj.get_data(llbox=AREA) md_blob = mObj.get_blob(llbox=AREA) blobs = md_blob.values blat = big['Lat'].values.tolist() blon = big['Lon'].values.tolist() barea = big['Area'].values.tolist() btemp = big['Temp'].values.tolist() for lon, lat, bt, ba in zip(blon, blat, btemp, barea): mcs = tm_utils.ll_toMSG(lon, lat) point = np.where((mll['x'] == mcs['x']) & (mll['y'] == mcs['y'])) # if not all(point): # if mcs['x'] > mll['x'].max() or mcs['x'] < mll['x'].min() or mcs['y'] > mll['y'].max() or mcs['y'] < mll['y'].min(): # continue # else: # print('Point not found but should be in!') # continue # blob number nb = blobs[point] # if we find a 0 instead of a blob, continue if not nb[0]: continue isblob = np.where(blobs == nb) if isblob[0].size < 2500: print('Ooops blob too small? This should not happen') continue # lat lons of complete blob blats = md['lat'].values[isblob] blons = md['lon'].values[isblob] # msg indices of complete blob my = mll['y'][isblob] mx = mll['x'][isblob] blatmin, blatmax = blats.min(), blats.max() blonmin, blonmax = blons.min(), blons.max() # whole blob must be inside TRMM. Attention: This draws a rectangle. # There is still a chance that blob is not in TRMM. Checked later! if not (td['lon'].values.min() < blonmin) & (td['lon'].values.max() > blonmax): continue if not (td['lat'].values.min() < blatmin) & (td['lat'].values.max() > blatmax): continue ll_trmm = tm_utils.ll_toMSG(td['lon'].values, td['lat'].values) tx = ll_trmm['x'] ty = ll_trmm['y'] mpair = ua.unique_of_pair(mx, my) tpair = ua.unique_of_pair(tx, ty) #Do we need to do it that way? inter = np.in1d(tpair, mpair) # returns false and true, whole grid inter_rev = np.in1d( mpair, tpair.flat[inter] ) # Attention: this leaves out meteosat cells where no closest TRMM cell (since TRMM is coarser!) # have at least 500 pixels shared for MCS between TRMM and MSG if sum(inter) < 500: continue print(_y, _m, _d, _h, _mi) bprcp = td['p'].values.flat[inter] bflags = td['flags'].values.flat[inter] mtt = md['t'].values[isblob].flat[inter_rev] # we need same number of TRMM and MSG per plot to do the masking if not bprcp.size == mtt.size: print('Tprcp and MSGT not same, someting wrong!') continue # rtest = np.copy(td['p'].values) # check the TRMM pixels identified # rtest.flat[inter] = 1500 # np.where(inter) # # # maskr = np.zeros_like(md['t'].values) # maskr[isblob] = 1000 # # np.where(maskr>999) # # mxinter = np.in1d(mxy, mpair[inter_rev]) # maskrr = np.zeros_like(md['t'].values) # maskrr.flat[mxinter] = 1100 # # plt.figure() # ax = plt.axes(projection=ccrs.PlateCarree()) # # plt.contourf(mlon, mlat, maskr, # transform=ccrs.PlateCarree()) # green, MSG blob # plt.contourf(td['lon'].values, td['lat'].values, rtest, levels=np.arange(1300, 1600, 100), # transform=ccrs.PlateCarree()) # identified TRMM pixel # #Identified MSG temperatures, problem: only nearest to TRMM, omits MSG pixels # plt.contourf(mlon, mlat, maskrr, levels=np.arange(1097, 1099, 1), # transform=ccrs.PlateCarree()) # green, MSG blob # ax.coastlines() if np.count_nonzero(bprcp) < 50: continue mask = tm_utils.getTRMMconv(bflags) # filter for convective rain mask = np.array(mask) smask = tm_utils.getTRMMstrat(bflags) # filter for convective rain smask = np.array(smask) nz_bprcp = np.sum(bprcp > 0.1) tall = np.nanmean(mtt[np.isfinite(bprcp)]) # remove all these zero rainfall from blob bprcpNZ = bprcp[bprcp > 0.1] mttNZ = mtt[bprcp > 0.1] flagsNZ = bflags[bprcp > 0.1] maskNZ = tm_utils.getTRMMconv( flagsNZ) # list of 0 and 1, flattened! smaskNZ = tm_utils.getTRMMstrat( flagsNZ) # list of 0 and 1, flattened! if sum(maskNZ) < 2: continue datess.append( (_y, _m, _d, _h, _mi, ba, td['lon'].values.min(), td['lon'].values.max(), td['lat'].values.min(), td['lat'].values.max(), blonmin, blonmax, blatmin, blatmax)) pm = np.nanmean(bprcpNZ) tm = np.nanmean(mttNZ) ppm = np.percentile(bprcpNZ, 98) pmax = np.nanmax(bprcp) pi = float(np.sum(bprcpNZ > 30)) / float(bprcpNZ.size) mdic['p'].append(pm) # prcp mean of every MCS (no zero) mdic['pp'].append(ppm) # value of 98 percentile of MCS (no zero) mdic['rain'].append(bprcpNZ) # whole rainfall field, no sum mdic['pmax'].append(pmax) # maximum pcp in MCS mdic['pi'].append(pi) # share of > 30mmh pixel of > 0 pixel mdic['t'].append(tm) # T where PCP > 0 and overlap mdic['tall'].append( tall) # T where cloud and TRMM valid (incl 0 rain) mdic['hod'].append(_h) # hour of day for image mdic['yr'].append(_y) # year for image mdic['mon'].append(_m) # month for image mdic['lat'].append(lat) mdic['lon'].append(lon) mdic['tpixel_nzero'].append( nz_bprcp) # nb pixel of MCS for PCP > 0 mdic['tpixel'].append(bprcp.size) # nb pixel of MCS including 0 mdic['tpixel_conv'].append(sum(mask)) # number convective pixel mdic['tpixel_strat'].append(sum(smask)) # number stratiform pixel mdic['tpixel_zero'].append(np.size(bprcp) - np.size(nz_bprcp)) # number zero pixel mdic['twhole'].append(bt) mdic['area'].append(isblob[0].size) print('Passed flag filter') # check for at least 500 TRMM pixels in MSG above 0 rain # if np.count_nonzero(bprcp) < 500: # continue pc = np.nanmean(bprcpNZ.flat[np.where(maskNZ)]) tc = np.nanmean(mttNZ.flat[np.where(maskNZ)]) pic = float(np.greater(bprcpNZ.flat[np.where(maskNZ)], 30.).sum()) / float(sum(maskNZ)) ppc = np.percentile(bprcpNZ.flat[np.where(maskNZ)], 98) pmaxc = bprcpNZ.flat[np.where(maskNZ)].max() # print 'Nb', nb mdic_f['pconv'].append(pc) mdic_f['piconv'].append(pic) mdic_f['ppconv'].append(ppc) mdic_f['pmaxconv'].append(pmaxc) mdic_f['tconv'].append(tc) mdic_f['tnfconv'].append(tm) mdic_f['hod'].append(_h) mdic_f['yr'].append(_y) mdic_f['mon'].append(_m) mdic_f['lat'].append(lat) mdic_f['lon'].append(lon) mdic_f['tpixel_convNZ'].append(sum(maskNZ)) mdic_f['tpixel_stratNZ'].append(sum(smaskNZ)) cnt = cnt + 1 print(cnt) myDicts = [mdic, mdic_f] for d in datess: print(d) pkl.dump( myDicts, open('/users/global/cornkle/data/OBS/test/c_paper_rainfield.p', 'wb') ) # MSG_TRMM_temp_pcp_300px'+str(yrange[0])+'-'+str(yrange[-1])+'_new.p', 'wb')) print('Saved ' + 'MSG_TRMM_temp_pcp_' + str(YRANGE[0]) + '-' + str(YRANGE[-1]) + '_new.p with ' + str(cnt) + ' MCSs')
def saveMCS(): trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/" msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30' t = trmm.ReadWA(trmm_folder, yrange=YRANGE, area=[-15, 4, 20, 25]) # (ll_lon, ll_lat, ur_lon, ur_lat) define initial TRMM box and scan for swaths in that box m = msg.ReadMsg(msg_folder) cnt = 0 # minute array to find closest MSG minute arr = np.array([15, 30, 45, 60, 0]) # loop through TRMM dates - only dates that have a certain number of pixels in llbox are considered for _y, _m, _d, _h, _mi in zip(t.dates.y, t.dates.m, t.dates.d, t.dates.h, t.dates.mi): tdic = t.get_ddata(_y, _m, _d, _h, _mi, cut=[3,26]) # cut TRMM data at lower/upper lat #get value of closest minute dm = arr - _mi dm = dm[dm<0] try: ind = (np.abs(dm)).argmin() except ValueError: continue # set smallest lag time for msg date = dt.datetime(_y, _m, _d, _h, _mi) dt0 = dm[ind] ndate = date + dt.timedelta(minutes=int(dt0)) m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) mdic = m.get_data(llbox=[tdic['lon'].values.min(), tdic['lat'].values.min(), tdic['lon'].values.max(),tdic['lat'].values.max()]) # check whether date is completely missing or just 30mins interval exists if not mdic: dm = np.delete(dm, np.argmin(np.abs(dm)), axis=0) # try second closest minute try: dummy = np.min(np.abs(dm))> 15 except ValueError: continue if dummy: print('Date missing') continue ind = (np.abs(dm)).argmin() dt0 = dm[ind] ndate = date + dt.timedelta(minutes=int(dt0)) m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) mdic = m.get_data(llbox=[tdic['lon'].values.min(), tdic['lat'].values.min(), tdic['lon'].values.max(), tdic['lat'].values.max()]) if not mdic: print('Date missing') continue print('TRMM:', date, 'MSG:', ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute ) lon1 = mdic['lon'].values # MSG coords lat1 = mdic['lat'].values mdic['t'].values[mdic['t'].values >= -10] = 0 # T threshold -10 for clouds ### filter minimum cloud size labels, numL = label(mdic['t'].values) u, inv = np.unique(labels, return_inverse=True) n = np.bincount(inv) goodinds = u[n > 39] # defines minimum MCS size e.g. 9x39 ~ 350km2 print(goodinds) # indices of clouds of "good size" if not sum(goodinds) > 0: continue for gi in goodinds: if gi == 0: # index 0 is always background, ignore! continue inds = np.where(labels == gi) # position of cloud # cut a box for every single blob (cloud) from msg - get min max lat lon of the blob, cut upper lower from TRMM to match blob latmax, latmin = lat1[inds].max(), lat1[inds].min() lonmax, lonmin = lon1.values[inds].max(), lon1[inds].min() mmeans = np.percentile(mdic['t'].values[inds], 90) td = t.get_ddata(_y, _m, _d, _h, _mi, cut=[latmin - 1, latmax + 1]) # for each cloud, cut TRMM swath dt0 = dm[ind] ml0 = m.get_data(llbox=[lonmin - 1, latmin - 1, lonmax + 1, latmax + 1]) # cut cloud box in MSG if not ml0: continue #make salem grid grid = u_grid.make(ml0['lon'].values, ml0['lat'].values,5000) # 5km regular grid from lat/lon coords lon, lat = grid.ll_coordinates # 5km grid lat/lon coordinates # interpolate TRMM and MSG to 5km common grid inter, mpoints = u_grid.griddata_input(ml0['lon'].values, ml0['lat'].values,grid) inter, tpoints = u_grid.griddata_input(td['lon'].values, td['lat'].values, grid) # Interpolate TRMM using delaunay triangularization try: dummyt = griddata(tpoints, td['p'].values.flatten(), inter, method='linear') except ValueError: continue outt = dummyt.reshape((grid.ny, grid.nx)) if np.sum(np.isfinite(outt)) < 5: # at least 5 valid pixel print('Kickout: TRMM min pixel < 5') continue # Interpolate TRMM flags USING NEAREST dummyf = griddata(tpoints, td['flags'].values.flatten(), inter, method='nearest') outf = dummyf.reshape((grid.ny, grid.nx)) outf=outf.astype(np.float) isnot = np.isnan(outt) outf[isnot]=np.nan ##remove artefact edges of interpolated TRMM for nb in range(5): boole = np.isnan(outt) outt[boole] = -1000 grad = np.gradient(outt) outt[boole] = np.nan outt[abs(grad[1]) > 300] = np.nan outt[abs(grad[0]) > 300] = np.nan outf[abs(grad[1]) > 300] = np.nan outf[abs(grad[0]) > 300] = np.nan #get convective rainfall only outff = tm_utils.getTRMMconv(outf) ## from TRMM flags, get positions of convective rain outk = np.zeros_like(outt) outk[np.where(outff)]=outt[np.where(outff)] # Interpolate MSG using delaunay triangularization dummy = griddata(mpoints, ml0['t'].values.flatten(), inter, method='linear') dummy = dummy.reshape((grid.ny, grid.nx)) outl = np.full_like(dummy, np.nan) xl, yl = grid.transform(lon1[inds], lat1[inds], crs=salem.wgs84, nearest=True, maskout=True) outl[yl.compressed(), xl.compressed()] = dummy[yl.compressed(), xl.compressed()] # TODO #### SHIFTING WITH RESPECT TO MIN T / MAX P - search for Pmax within 20km from Tmin, shift TRMM image # # tmin = np.argmin(outl) # pmax = # # dist = # tmask = np.isfinite(outt) mmask = np.isfinite(outl) mask2 = np.isfinite(outl[tmask]) #last check for min area, crazy rainfall or crazy cloud size if (sum(mmask.flatten())*25 < 350) or (outt.max()>200) or (sum(mmask.flatten())*25 > 1500000): continue if sum(mask2.flatten()) < 5: # Check minimum overlap between TRMM swath and MSG cloud print('Kickout: TRMM MSG overlap less than 3pix of cloud area') continue print('Hit:', gi) da = xr.Dataset({'p': (['x', 'y'], outt), # rainfall field 'pconv': (['x', 'y'], outk), # convective rainfall 't_lag0': (['x', 'y'], dummy), # full T image in cutout region 'tc_lag0': (['x', 'y'], outl), # cloud area only }, coords={'lon': (['x', 'y'], lon), 'lat': (['x', 'y'], lat), 'time': date}) da.attrs['lag0'] = dt0 # lag in minutes between TRMM / MSG da.attrs['meanT'] = np.mean(outl[mmask]) # cloud mean T da.attrs['T90perc'] = mmeans # cloud 90perc T da.attrs['meanT_cut'] = np.mean(outl[tmask][mask2]) # cloud mean T in TRMM region da.attrs['area'] = sum(mmask.flatten()) # total cloud area da.attrs['area_cut'] = sum(mask2) # cloud area overlapping with TRMM da.close() savefile = '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_zR/' + date.strftime('%Y-%m-%d_%H:%M:%S') + '_' + str(gi) + '.nc' try: os.remove(savefile) except OSError: pass da.to_netcdf(path=savefile, mode='w') print('Saved ' + savefile) cnt = cnt + 1 print('Saved ' + str(cnt) + ' TRMM/MSG merged MCSs as netcdf.')