Example #1
0
def parseCellTab(tab):

    dt.datetime.strptime('2004_0601_1315', '%Y_%m%d_%H%M')
    #  parser = lambda date: pd.datetime.strptime(date, '%Y_%m%d_%h%M')

    df = pd.read_csv(tab,
                     sep='\s+',
                     header=None,
                     converters={'Mday': lambda x: str(x)},
                     names=[
                         "Year", "Mday", "Slot", "Pixel", "Area", "Lat", "Lon",
                         "Mincol", "a", "b", "c", "Temp", "Tresh"
                     ])

    sec = df["Slot"] * 30. * 60.
    t = ut.sec_to_time(sec[0])
    df["Hour"] = df["Slot"] * 0 + t.hour
    df["Minute"] = df["Slot"] * 0 + t.minute
    df["Hour"] = df.Hour.map("{:02}".format)
    df["Minute"] = df.Minute.map("{:02}".format)

    small = df.loc[:,
                   ["Pixel", "Area", "Lat", "Lon", "Mincol", "Temp", "Tresh"]]

    small["Date"] = df.Year.astype(str).str.cat(df.Mday.astype(str), sep='_')
    small["Date"] = small.Date.astype(str).str.cat(df.Hour.astype(str),
                                                   sep='_')
    small["Date"] = small.Date.astype(str).str.cat(df.Minute.astype(str),
                                                   sep='')
    small["Date"] = pd.to_datetime(small["Date"], format='%Y_%m%d_%H%M')

    return small
Example #2
0
def extract_TRMMfile(tpath, hod=HOD, yrange=YRANGE, mtresh=MTRESH):
    path = tpath
    pattern = path + '{0:d}/{1:02d}/2A25.{0:d}{1:02d}{2:02d}.*.7_rain_f4.gra'

    fdic = {'fpath': [], 'tmins': [], 'date': ut.date_list()}

    for yr, mo, dy in itertools.product(yrange, range(6, 10), range(1, 31)):
        #   print yr
        a = ''
        date = np.array([yr, mo, dy])

        extpath = pattern.format(date[0], date[1], date[2])
        a = glob.glob(extpath)

        if a:
            for eachfile in a:

                rain_str = eachfile.replace('_rain_f4', '')
                time_str = eachfile.replace('_rain_f4', '_time')

                rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day

                secmean = rr.mean()
                t = ut.sec_to_time(secmean)

                if not t.hour in hod:
                    continue

                # test whether close to 30mins or full hour
                if mtresh:
                    if (t.minute > 3) & (t.minute < 27):
                        continue
                    if (t.minute > 33) & (t.minute < 57):
                        continue

                        #     print t.minute
                minute = 0  # guessing that t.minute is shortly after full

                # print(t.minute)
                # I could include a better minute filter here
                if t.minute > 15 and t.minute < 45:
                    minute = 30


                    #     print 'MSG', minute
                fdic['fpath'].append(rain_str)
                fdic['date'].add(yr, mo, dy, t.hour, minute, 0)
                fdic['tmins'].append(t.minute)
    print(fdic['fpath'])
    return fdic
Example #3
0
def extract_TRMMfile(tpath, hod=HOD, yrange=YRANGE, mtresh=MTRESH):
    path = tpath
    pattern = path + '{0:d}/{1:02d}/2A25.{0:d}{1:02d}{2:02d}.*.7_rain_f4.gra'

    fdic = {'fpath': [], 'tmins': [], 'date': ut.date_list()}

    for yr, mo, dy in itertools.product(yrange, range(6, 10), range(1, 31)):
        #   print yr
        a = ''
        date = np.array([yr, mo, dy])

        extpath = pattern.format(date[0], date[1], date[2])
        a = glob.glob(extpath)

        if a:
            for eachfile in a:

                rain_str = eachfile.replace('_rain_f4', '')
                time_str = eachfile.replace('_rain_f4', '_time')

                rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day

                secmean = rr.mean()
                t = ut.sec_to_time(secmean)

                if not t.hour in hod:
                    continue

                # test whether close to 30mins or full hour
                if mtresh:
                    if (t.minute > 3) & (t.minute < 27):
                        continue
                    if (t.minute > 33) & (t.minute < 57):
                        continue

                        #     print t.minute
                minute = 0  # guessing that t.minute is shortly after full

                # print(t.minute)
                # I could include a better minute filter here
                if t.minute > 15 and t.minute < 45:
                    minute = 30

                    #     print 'MSG', minute
                fdic['fpath'].append(rain_str)
                fdic['date'].add(yr, mo, dy, t.hour, minute, 0)
                fdic['tmins'].append(t.minute)
    print(fdic['fpath'])
    return fdic
Example #4
0
def parseCellTab(tab):

    dt.datetime.strptime('2004_0601_1315', '%Y_%m%d_%H%M')
  #  parser = lambda date: pd.datetime.strptime(date, '%Y_%m%d_%h%M')

    df = pd.read_csv(tab, sep='\s+', header=None, converters={'Mday': lambda x: str(x)}, names=["Year", "Mday", "Slot", "Pixel", "Area", "Lat", "Lon", "Mincol", "a", "b", "c", "Temp", "Tresh"])

    sec = df["Slot"]*30.*60.
    t = ut.sec_to_time(sec[0])
    df["Hour"] = df["Slot"]*0+t.hour
    df["Minute"] = df["Slot"]*0+t.minute
    df["Hour"] = df.Hour.map("{:02}".format)
    df["Minute"] = df.Minute.map("{:02}".format)

    small=df.loc[:, ["Pixel", "Area", "Lat", "Lon", "Mincol", "Temp", "Tresh"]]

    small["Date"] = df.Year.astype(str).str.cat(df.Mday.astype(str), sep='_')
    small["Date"] = small.Date.astype(str).str.cat(df.Hour.astype(str), sep='_')
    small["Date"] = small.Date.astype(str).str.cat(df.Minute.astype(str), sep='')
    small["Date"] = pd.to_datetime(small["Date"], format='%Y_%m%d_%H%M')

    return small
Example #5
0
    def __init__(self,
                 trmm_folder,
                 yrange=YRANGE,
                 mrange=MRANGE,
                 hod=HOD,
                 area=None):

        min_rain_swath = 200
        min_rain_box = 200
        min_tpixel = 2500
        rain_thresh = 0.1

        if not os.path.isdir(trmm_folder):
            print('Not a directory')
            quit()

        fdic = {'fpath': [], 'tmins': [], 'date': []}
        rfiles = []

        for yr, mo in itertools.product(
                yrange, mrange):  # rain_f4 files only available for 6 to 10

            tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('.7.gra', tpath)
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        if not rfiles:
            print('No trmm files found')
            return

            #  self.fpath=fdic['fpath']
            #  return
        for eachfile in rfiles:
            rain_str = eachfile
            time_str = eachfile.replace('.7.', '.7_time.')
            try:
                rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day
            except FileNotFoundError:
                print(time_str + ' missing, continue')
                continue

            secmean = rr.mean()
            try:
                t = ut.sec_to_time(secmean)
            except ValueError:
                print('ValueError sec to time')
                continue
            if not t.hour in hod:
                continue

            rr = np.fromfile(rain_str, dtype=np.int16)
            x = 49  # trmm swath is always 49 wide
            nb = rr.size
            single = int(nb / 4)  # variables lon lat rainrate flag

            lons = rr[0:single]
            lats = rr[single:2 * single]
            rainrs = rr[2 * single:3 * single]
            y = int(lons.size / x)
            lons = np.resize(lons, (y, x))
            lats = np.resize(lats, (y, x))
            rainrs = np.resize(rainrs, (y, x))
            lont = lons / 100.
            latt = lats / 100.
            rain = rainrs / 10.

            if np.sum(
                    rain > rain_thresh
            ) < min_rain_swath:  # minimum TRMM rainfall > 0.1 in swath
                continue
            if area:
                box = np.where((lont > area[0]) & (lont < area[1])
                               & (latt > area[2]) & (latt < area[3]))

                if not box[0].any():
                    continue
                    #       print(len(box[0]))
                if len(
                        box[0]
                ) < min_tpixel:  # minimum pixel overlap with TRMM and box (50000km2)
                    continue
                if np.sum(rain[box] > rain_thresh
                          ) < min_rain_box:  # minimum rainfall in defined box
                    continue

            fdic['fpath'].append(rain_str)
            # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
            #                  0)

            fdic['date'].append(
                pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]),
                            int(rain_str[-14:-12]), t.hour, t.minute, 0))

        self.fpaths = fdic['fpath']
        self.dates = pd.Series(fdic['date'])
        self.__area = area
Example #6
0
    def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None):


        min_rain_swath = 200
        min_rain_box = 200
        min_tpixel = 2500
        rain_thresh = 0.1

        if not os.path.isdir(trmm_folder):
            print('Not a directory')
            quit()

        fdic = {'fpath': [], 'tmins': [], 'date': []}
        rfiles = []

        for yr, mo in itertools.product(yrange, mrange):  # rain_f4 files only available for 6 to 10

            tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2))
            try:
                files = uarr.locate('_rain_f4.gra', tpath)
            except OSError:
                continue

            rfiles.extend(files)

        rfiles.sort(key=ul.natural_keys)

        if not rfiles:
            print('No trmm files found')
            return

            #  self.fpath=fdic['fpath']
            #  return
        for eachfile in rfiles:
            rain_str = eachfile.replace('_rain_f4', '')
            time_str = eachfile.replace('_rain_f4', '_time')
            rr = np.fromfile(time_str, dtype=np.float32)  # seconds of day

            secmean = rr.mean()
            t = ut.sec_to_time(secmean)

            if not t.hour in hod:
                continue

            rr = np.fromfile(rain_str, dtype=np.int16)
            x = 49  # trmm swath is always 49 wide
            nb = rr.size
            single = int(nb / 4)  # variables lon lat rainrate flag

            lons = rr[0:single]
            lats = rr[single:2 * single]
            rainrs = rr[2 * single:3 * single]
            y = int(lons.size / x)
            lons = np.resize(lons, (y, x))
            lats = np.resize(lats, (y, x))
            rainrs = np.resize(rainrs, (y, x))
            lont = lons / 100.
            latt = lats / 100.
            rain = rainrs / 10.

            if np.sum(rain>rain_thresh) < min_rain_swath:  # minimum TRMM rainfall > 0.1 in swath
                continue
            if area:
                box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3]))

                if not box[0].any():
                    continue
                    #       print(len(box[0]))
                if len(box[0]) < min_tpixel:  # minimum pixel overlap with TRMM and box (50000km2)
                    continue
                if np.sum(rain[box]>rain_thresh) < min_rain_box:  # minimum rainfall in defined box
                    continue


            fdic['fpath'].append(rain_str)
            # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
            #                  0)

            fdic['date'].append(pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute,
                             0))

        self.fpaths = fdic['fpath']
        self.dates = pd.Series(fdic['date'])
        self.__area = area