def parseCellTab(tab): dt.datetime.strptime('2004_0601_1315', '%Y_%m%d_%H%M') # parser = lambda date: pd.datetime.strptime(date, '%Y_%m%d_%h%M') df = pd.read_csv(tab, sep='\s+', header=None, converters={'Mday': lambda x: str(x)}, names=[ "Year", "Mday", "Slot", "Pixel", "Area", "Lat", "Lon", "Mincol", "a", "b", "c", "Temp", "Tresh" ]) sec = df["Slot"] * 30. * 60. t = ut.sec_to_time(sec[0]) df["Hour"] = df["Slot"] * 0 + t.hour df["Minute"] = df["Slot"] * 0 + t.minute df["Hour"] = df.Hour.map("{:02}".format) df["Minute"] = df.Minute.map("{:02}".format) small = df.loc[:, ["Pixel", "Area", "Lat", "Lon", "Mincol", "Temp", "Tresh"]] small["Date"] = df.Year.astype(str).str.cat(df.Mday.astype(str), sep='_') small["Date"] = small.Date.astype(str).str.cat(df.Hour.astype(str), sep='_') small["Date"] = small.Date.astype(str).str.cat(df.Minute.astype(str), sep='') small["Date"] = pd.to_datetime(small["Date"], format='%Y_%m%d_%H%M') return small
def extract_TRMMfile(tpath, hod=HOD, yrange=YRANGE, mtresh=MTRESH): path = tpath pattern = path + '{0:d}/{1:02d}/2A25.{0:d}{1:02d}{2:02d}.*.7_rain_f4.gra' fdic = {'fpath': [], 'tmins': [], 'date': ut.date_list()} for yr, mo, dy in itertools.product(yrange, range(6, 10), range(1, 31)): # print yr a = '' date = np.array([yr, mo, dy]) extpath = pattern.format(date[0], date[1], date[2]) a = glob.glob(extpath) if a: for eachfile in a: rain_str = eachfile.replace('_rain_f4', '') time_str = eachfile.replace('_rain_f4', '_time') rr = np.fromfile(time_str, dtype=np.float32) # seconds of day secmean = rr.mean() t = ut.sec_to_time(secmean) if not t.hour in hod: continue # test whether close to 30mins or full hour if mtresh: if (t.minute > 3) & (t.minute < 27): continue if (t.minute > 33) & (t.minute < 57): continue # print t.minute minute = 0 # guessing that t.minute is shortly after full # print(t.minute) # I could include a better minute filter here if t.minute > 15 and t.minute < 45: minute = 30 # print 'MSG', minute fdic['fpath'].append(rain_str) fdic['date'].add(yr, mo, dy, t.hour, minute, 0) fdic['tmins'].append(t.minute) print(fdic['fpath']) return fdic
def parseCellTab(tab): dt.datetime.strptime('2004_0601_1315', '%Y_%m%d_%H%M') # parser = lambda date: pd.datetime.strptime(date, '%Y_%m%d_%h%M') df = pd.read_csv(tab, sep='\s+', header=None, converters={'Mday': lambda x: str(x)}, names=["Year", "Mday", "Slot", "Pixel", "Area", "Lat", "Lon", "Mincol", "a", "b", "c", "Temp", "Tresh"]) sec = df["Slot"]*30.*60. t = ut.sec_to_time(sec[0]) df["Hour"] = df["Slot"]*0+t.hour df["Minute"] = df["Slot"]*0+t.minute df["Hour"] = df.Hour.map("{:02}".format) df["Minute"] = df.Minute.map("{:02}".format) small=df.loc[:, ["Pixel", "Area", "Lat", "Lon", "Mincol", "Temp", "Tresh"]] small["Date"] = df.Year.astype(str).str.cat(df.Mday.astype(str), sep='_') small["Date"] = small.Date.astype(str).str.cat(df.Hour.astype(str), sep='_') small["Date"] = small.Date.astype(str).str.cat(df.Minute.astype(str), sep='') small["Date"] = pd.to_datetime(small["Date"], format='%Y_%m%d_%H%M') return small
def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None): min_rain_swath = 200 min_rain_box = 200 min_tpixel = 2500 rain_thresh = 0.1 if not os.path.isdir(trmm_folder): print('Not a directory') quit() fdic = {'fpath': [], 'tmins': [], 'date': []} rfiles = [] for yr, mo in itertools.product( yrange, mrange): # rain_f4 files only available for 6 to 10 tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2)) try: files = uarr.locate('.7.gra', tpath) except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) if not rfiles: print('No trmm files found') return # self.fpath=fdic['fpath'] # return for eachfile in rfiles: rain_str = eachfile time_str = eachfile.replace('.7.', '.7_time.') try: rr = np.fromfile(time_str, dtype=np.float32) # seconds of day except FileNotFoundError: print(time_str + ' missing, continue') continue secmean = rr.mean() try: t = ut.sec_to_time(secmean) except ValueError: print('ValueError sec to time') continue if not t.hour in hod: continue rr = np.fromfile(rain_str, dtype=np.int16) x = 49 # trmm swath is always 49 wide nb = rr.size single = int(nb / 4) # variables lon lat rainrate flag lons = rr[0:single] lats = rr[single:2 * single] rainrs = rr[2 * single:3 * single] y = int(lons.size / x) lons = np.resize(lons, (y, x)) lats = np.resize(lats, (y, x)) rainrs = np.resize(rainrs, (y, x)) lont = lons / 100. latt = lats / 100. rain = rainrs / 10. if np.sum( rain > rain_thresh ) < min_rain_swath: # minimum TRMM rainfall > 0.1 in swath continue if area: box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3])) if not box[0].any(): continue # print(len(box[0])) if len( box[0] ) < min_tpixel: # minimum pixel overlap with TRMM and box (50000km2) continue if np.sum(rain[box] > rain_thresh ) < min_rain_box: # minimum rainfall in defined box continue fdic['fpath'].append(rain_str) # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, # 0) fdic['date'].append( pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, 0)) self.fpaths = fdic['fpath'] self.dates = pd.Series(fdic['date']) self.__area = area
def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None): min_rain_swath = 200 min_rain_box = 200 min_tpixel = 2500 rain_thresh = 0.1 if not os.path.isdir(trmm_folder): print('Not a directory') quit() fdic = {'fpath': [], 'tmins': [], 'date': []} rfiles = [] for yr, mo in itertools.product(yrange, mrange): # rain_f4 files only available for 6 to 10 tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2)) try: files = uarr.locate('_rain_f4.gra', tpath) except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) if not rfiles: print('No trmm files found') return # self.fpath=fdic['fpath'] # return for eachfile in rfiles: rain_str = eachfile.replace('_rain_f4', '') time_str = eachfile.replace('_rain_f4', '_time') rr = np.fromfile(time_str, dtype=np.float32) # seconds of day secmean = rr.mean() t = ut.sec_to_time(secmean) if not t.hour in hod: continue rr = np.fromfile(rain_str, dtype=np.int16) x = 49 # trmm swath is always 49 wide nb = rr.size single = int(nb / 4) # variables lon lat rainrate flag lons = rr[0:single] lats = rr[single:2 * single] rainrs = rr[2 * single:3 * single] y = int(lons.size / x) lons = np.resize(lons, (y, x)) lats = np.resize(lats, (y, x)) rainrs = np.resize(rainrs, (y, x)) lont = lons / 100. latt = lats / 100. rain = rainrs / 10. if np.sum(rain>rain_thresh) < min_rain_swath: # minimum TRMM rainfall > 0.1 in swath continue if area: box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3])) if not box[0].any(): continue # print(len(box[0])) if len(box[0]) < min_tpixel: # minimum pixel overlap with TRMM and box (50000km2) continue if np.sum(rain[box]>rain_thresh) < min_rain_box: # minimum rainfall in defined box continue fdic['fpath'].append(rain_str) # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, # 0) fdic['date'].append(pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, 0)) self.fpaths = fdic['fpath'] self.dates = pd.Series(fdic['date']) self.__area = area