Example #1
0
def run():
    #  (1174, 378)
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30'
    pool = multiprocessing.Pool(processes=7)

    m = msg.ReadMsg(msg_folder, y1=2006, y2=2010)
    files  = m.fpath

    #files = files[1050:1057]
    mdic = m.read_data(files[0])
    # make salem grid
    grid = u_grid.make(mdic['lon'].values, mdic['lat'].values, 5000) #m.lon, m.lat, 5000)


    files_str = []

    for f in files:
        # if f[-8:-6] != 6:
        #     continue
        files_str.append(f[0:-8])  # we keep only daily file names and deal with hours and minutes in loop


    files_str = np.unique(files_str)

    passit = []
    for f in files_str:
        passit.append((grid,m, f))


    # res = pool.map(file_loop, passit)
    #
    # pool.close()
    for p in passit:
        file_loop(p)
def run():

    msg_folder = cnst.network_data + 'data/OBS/meteosat_WA30'
    #msg_folder

    for yy in range(2013,2014):   # (2004,2016)

        for mm in [9]:

            pool = multiprocessing.Pool(processes=6)

            m = msg.ReadMsg(msg_folder, y1=yy, y2=yy, months=[mm])
            files  = m.fpath

            mdic = m.read_data(files[0], llbox=[-15, 1.5, 5, 11.5])  #[-14, 2.5, 4, 11.5]

            # make salem grid
            grid = u_grid.make(mdic['lon'].values, mdic['lat'].values, 5000)
            inds, weights, shape = u_int.interpolation_weights_grid(mdic['lon'].values, mdic['lat'].values, grid)
            gridd = (inds,weights,shape, grid)

            files_str = []

            for f in files:
                files_str.append(f[0:-6])

            files_str = np.unique(files_str)

            passit = []
            for f in files_str:
                passit.append((gridd,m, f))
            #
            res = pool.map(file_loop, passit)

            # for l in passit:
            #
            #     test = file_loop(l)

            pool.close()

            res = [x for x in res if x is not None]

            ds = xr.concat(res, 'time')
            path =  '/prj/vera/cores/' # cnst.network_data + 'MCSfiles/VERA_blobs/'
            savefile = path + 'test_size2_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'#'blobMap_-40-700km2_-50-points_dominant_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'

            try:
                os.remove(savefile)
            except OSError:
                pass
            #da.name = 'blob'
            #enc = {'blob': {'complevel': 5, 'zlib': True}}

            comp = dict(zlib=True, complevel=5)
            enc = {var: comp for var in ds.data_vars}

            ds.to_netcdf(path=savefile, mode='w', encoding=enc, format='NETCDF4')

            print('Saved ' + savefile)
Example #3
0
def run():

    msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30'
    pool = multiprocessing.Pool(processes=7)

    m = msg.ReadMsg(msg_folder)
    files = m.fpath

    mdic = m.read_data(files[0], llbox=[-11, 11, 9, 20])
    # make salem grid
    grid = u_grid.make(mdic['lon'].values, mdic['lat'].values,
                       5000)  #m.lon, m.lat, 5000)
    inds, weights, shape = u_int.interpolation_weights_grid(
        mdic['lon'].values, mdic['lat'].values, grid)
    gridd = (inds, weights, shape, grid)

    files_str = []

    for f in files:
        files_str.append(f[0:-6])

    files_str = np.unique(files_str)

    passit = []
    for f in files_str:
        passit.append((gridd, m, f))

    res = pool.map(file_loop, passit)

    # for l in passit:
    #
    #     test = file_loop(l)

    pool.close()

    res = [x for x in res if x is not None]

    da = xr.concat(res, 'time')

    savefile = '/users/global/cornkle/MCSfiles/blob_map_MCSs_-50_JJAS_points_dominant_gt15k.nc'

    try:
        os.remove(savefile)
    except OSError:
        pass
    da.name = 'blob'
    enc = {'blob': {'complevel': 5, 'zlib': True}}
    da.to_netcdf(path=savefile, mode='w', encoding=enc, format='NETCDF4')

    print('Saved ' + savefile)
Example #4
0
def run():
    #  (1174, 378)
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30'
    pool = multiprocessing.Pool(processes=6)

    m = msg.ReadMsg(msg_folder, y1=2006, y2=2010)
    files  = m.fpath

    #files = files[1050:1057]
    mdic = m.read_data(files[0], llbox=[-11, 11, 9, 20])
    # make salem grid
    grid = u_grid.make(mdic['lon'].values, mdic['lat'].values, 5000) #m.lon, m.lat, 5000)
    inds, weights, shape = u_int.interpolation_weights_grid(mdic['lon'].values, mdic['lat'].values, grid)
    gridd = (inds,weights,shape, grid)

    files_str = []

    for f in files:
        files_str.append(f[0:-6])

    files_str = np.unique(files_str)

    passit = []
    for f in files_str:
        passit.append((gridd,m, f))

    res = pool.map(file_loop, passit)

    # for l in passit[0:24]:
    #
    #     test = file_loop(l)

    pool.close()

    res = [x for x in res if x is not None]

    da = xr.concat(res, 'time')
    savefile = '/users/global/cornkle/MCSfiles/blob_map_JJAS_-70CentreMass_GT5000k.nc'

    try:
        os.remove(savefile)
    except OSError:
        pass
    da.name = 'blob'
    enc = {'blob': {'complevel': 5, 'zlib': True}}
    da.to_netcdf(path=savefile, mode='w') #encoding=enc, format='NETCDF4'

    print('Saved ' + savefile)
Example #5
0
def save_values_regrid():
    trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/"
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_tropWA' #meteosat_WA30'

    t = trmm_clover.ReadWA(trmm_folder, yrange=YRANGE, area=[-14, 12, 4, 9])   # [-15, 15, 4, 21], [-10, 10, 10, 20]
    value_list = []
    for _y, _m, _d, _h, _mi in zip(t.dates.dt.year,  t.dates.dt.month, t.dates.dt.day, t.dates.dt.hour, t.dates.dt.minute):

        if (_h <10) | (_h>19):
            continue

        date = dt.datetime(_y, _m, _d, _h, _mi)

        tdic = t.get_ddata(date, cut=[4.3, 8])
        lon = np.arange(-14,13,1)
        lat = np.arange(4,9,1)
        # make salem grid

        grid = u_grid.make(lon, lat, 5000)
        lon, lat = grid.ll_coordinates

        # interpolate TRM and MSG to salem grid
        inter, tpoints = u_grid.griddata_input(tdic['lon'].values, tdic['lat'].values, grid)

        # Interpolate TRMM using delaunay triangularization
        try:
            dummyt = griddata(tpoints, tdic['p'].values.flatten(), inter, method='linear')
        except ValueError:
            continue
        outt = dummyt.reshape((grid.ny, grid.nx))

        ##remove edges of interpolated TRMM
        for nb in range(5):
            boole = np.isnan(outt)
            outt[boole] = -1000
            grad = np.gradient(outt)
            outt[boole] = np.nan
            outt[abs(grad[1]) > 300] = np.nan
            outt[abs(grad[0]) > 300] = np.nan

        print('TRMM:', date)

        value_list.extend((outt)[outt>=0.1].flatten())

    pkl.dump(np.array(value_list), open('/users/global/cornkle/data/CLOVER/saves/trmm_values_2004-2016MAM_10-19UTC_-14W12E_4-9N_0.1mm_regrid.p', 'wb'))
Example #6
0
def rewrite_topo():
    path = '/home/ck/DIR/cornkle/data/ancils_python/'
    file = path + 'gtopo_1min_afr.nc'
    topo = xr.open_dataset(file)

    grid = u_grid.make(topo['lon'].values, topo['lat'].values,
                       3000)  #m.lon, m.lat, 5000)
    outtopo = grid.lookup_transform(topo['h'])
    lon, lat = grid.ll_coordinates

    da = xr.DataArray(outtopo,
                      coords={
                          'lat': lat[:, 0],
                          'lon': lon[0, :]
                      },
                      dims=['lat', 'lon'])  # .isel(time=0)
    da.name = 'h'

    da.to_netcdf(path=path + 'gtopo_3km_WA.nc', mode='w', format='NETCDF4')
Example #7
0
def saveMCS_WA15():
    trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/"
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_tropWA'  #meteosat_WA30'

    t = trmm_clover.ReadWA(trmm_folder, yrange=YRANGE,
                           area=[-13, 13, 4,
                                 8])  # [-15, 15, 4, 21], [-10, 10, 10, 20]
    m = msg.ReadMsg(msg_folder)

    cnt = 0

    # define the "0 lag" frist
    arr = np.array([15, 30, 45, 60, 0])
    #mon = [3,4,5] # months march april may only
    # cycle through TRMM dates - only dates tat have a certain number of pixels in llbox are considered
    for _y, _m, _d, _h, _mi in zip(t.dates.dt.year, t.dates.dt.month,
                                   t.dates.dt.day, t.dates.dt.hour,
                                   t.dates.dt.minute):

        if (_h < 10) | (_h > 21):
            continue

        if (_m < 9) | (_m > 10):
            continue

        date = dt.datetime(_y, _m, _d, _h, _mi)

        tdic = t.get_ddata(date, cut=[4, 8])

        #get closest minute
        dm = arr - _mi
        dm = dm[dm < 0]
        try:
            ind = (np.abs(dm)).argmin()
        except ValueError:
            continue

        # set zero shift time for msg

        dt0 = dm[ind]
        ndate = date + dt.timedelta(minutes=int(dt0))
        m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour,
                   ndate.minute)

        mdic = m.get_data(llbox=[
            tdic['lon'].values.min(), tdic['lon'].values.max(),
            tdic['lat'].values.min(), tdic['lat'].values.max()
        ])

        # check whether date is completely missing or just 30mins interval exists
        # if str(date) == '2004-05-02 13:15:00':
        #     pdb.set_trace()
        if not mdic:
            dm = np.delete(dm, np.argmin(np.abs(dm)), axis=0)
            try:
                dummy = np.min(np.abs(dm)) > 15
            except ValueError:
                continue
            if dummy:
                print('Date missing')
                continue
            ind = (np.abs(dm)).argmin()
            dt0 = dm[ind]
            ndate = date + dt.timedelta(minutes=int(dt0))
            m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour,
                       ndate.minute)
            mdic = m.get_data(llbox=[
                tdic['lon'].values.min(), tdic['lon'].values.max(),
                tdic['lat'].values.min(), tdic['lat'].values.max()
            ])

            if not mdic:
                print('Date missing')
                continue

        print('TRMM:', date, 'MSG:', ndate.year, ndate.month, ndate.day,
              ndate.hour, ndate.minute)

        lon1 = mdic['lon'].values
        lat1 = mdic['lat'].values
        mdic['t'].values[mdic['t'].values >= -40] = 0  # T threshold -10
        labels, numL = label(mdic['t'].values)

        u, inv = np.unique(labels, return_inverse=True)
        n = np.bincount(inv)

        goodinds = u[
            n >
            556]  # defines minimum MCS size e.g. 350 km2 = 39 pix at 3x3km res , 5000km2 = 556 pixel
        print(goodinds)
        if not sum(goodinds) > 0:
            continue

        for gi in goodinds:
            if gi == 0:  # index 0 is always background, ignore!
                continue

            inds = np.where(labels == gi)

            # cut a box for every single blob from msg - get min max lat lon of the blob, cut upper lower from TRMM to match blob
            latmax, latmin = mdic['lat'].values[inds].max(
            ), mdic['lat'].values[inds].min()
            lonmax, lonmin = mdic['lon'].values[inds].max(
            ), mdic['lon'].values[inds].min()
            mmeans = np.percentile(mdic['t'].values[inds], 90)
            td = t.get_ddata(date, cut=[latmin - 1, latmax + 1])

            # ensure minimum trmm rainfall in area
            # if len(np.where(td['p'].values > 0.1)[0]) < 1:  # at least 1 pixel with rainfall
            #     print('Kickout: TRMM min pixel < 1')
            #     continue

            dt0 = dm[ind]
            ndate = date + dt.timedelta(minutes=int(dt0))

            # if (ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) == (2006, 6, 6, 5, 0):
            #     ipdb.set_trace()

            ml0 = m.get_data(
                llbox=[lonmin - 1, lonmax + 1, latmin - 1, latmax + 1])
            if not ml0:
                continue

            #make salem grid
            grid = u_grid.make(ml0['lon'].values, ml0['lat'].values, 5000)
            lon, lat = grid.ll_coordinates

            # interpolate TRM and MSG to salem grid
            inter, mpoints = u_grid.griddata_input(ml0['lon'].values,
                                                   ml0['lat'].values, grid)
            inter, tpoints = u_grid.griddata_input(td['lon'].values,
                                                   td['lat'].values, grid)

            # Interpolate TRMM using delaunay triangularization
            try:
                dummyt = griddata(tpoints,
                                  td['p'].values.flatten(),
                                  inter,
                                  method='linear')
            except ValueError:
                continue
            outt = dummyt.reshape((grid.ny, grid.nx))
            # if len(np.where(outt > 0.1)[0]) < 2:  # at least 2 pixel with rainfall
            #     print('Kickout: TRMM wavelet min pixel pcp < 2')
            #     continue

            if np.sum(np.isfinite(outt)) < 5:  # at least 2 valid pixel
                print('Kickout: TRMM wavelet min pixel  < 2')
                continue

            # Interpolate TRMM flags using nearest
            dummyf = griddata(tpoints,
                              td['flags'].values.flatten(),
                              inter,
                              method='nearest')
            outf = dummyf.reshape((grid.ny, grid.nx))
            outf = outf.astype(np.float)
            isnot = np.isnan(outt)
            outf[isnot] = np.nan

            ##remove edges of interpolated TRMM
            for nb in range(5):
                boole = np.isnan(outt)
                outt[boole] = -1000
                grad = np.gradient(outt)
                outt[boole] = np.nan
                outt[abs(grad[1]) > 300] = np.nan
                outt[abs(grad[0]) > 300] = np.nan
                outf[abs(grad[1]) > 300] = np.nan
                outf[abs(grad[0]) > 300] = np.nan

            #get convective rainfall only
            outff = tm_utils.getTRMMconv(outf)
            outk = outt.copy() * 0
            outk[np.where(outff)] = outt[np.where(outff)]

            # Interpolate MSG using delaunay triangularization
            dummy = griddata(mpoints,
                             ml0['t'].values.flatten(),
                             inter,
                             method='linear')
            dummy = dummy.reshape((grid.ny, grid.nx))
            outl = np.full_like(dummy, np.nan)
            xl, yl = grid.transform(lon1[inds],
                                    lat1[inds],
                                    crs=salem.wgs84,
                                    nearest=True,
                                    maskout=True)
            outl[yl.compressed(), xl.compressed()] = dummy[yl.compressed(),
                                                           xl.compressed()]

            # #### SHIFTING WITH RESPECT TO MIN T / MAX P - search for Pmax within 20km from Tmin, shift TRMM image
            #
            # tmin = np.argmin(outl)
            # pmax =
            #
            # dist =
            #

            tmask = np.isfinite(outt)
            mmask = np.isfinite(outl)
            mask2 = np.isfinite(outl[tmask])

            if (sum(mmask.flatten()) * 25 < 350) | (
                    outt.max() > 200
            ):  # or (sum(mmask.flatten())*25 > 1500000): #or (outt.max()<0.1)
                continue

            if sum(mask2.flatten()) < 5:  # sum(mmask.flatten())*0.3:
                print('Kickout: TRMM MSG overlap less than 3pix of cloud area')
                continue

            print('Hit:', gi)

            da = xr.Dataset(
                {
                    'p': (['x', 'y'], outt),
                    'pconv': (['x', 'y'], outk),
                    't_lag0': (['x', 'y'], dummy),
                    'tc_lag0': (['x', 'y'], outl),
                },
                coords={
                    'lon': (['x', 'y'], lon),
                    'lat': (['x', 'y'], lat),
                    'time': date
                })
            da.attrs['lag0'] = dt0
            da.attrs['meanT'] = np.mean(outl[mmask])
            da.attrs['T90perc'] = mmeans
            da.attrs['meanT_cut'] = np.mean(outl[tmask][mask2])
            da.attrs['area'] = sum(mmask.flatten())
            da.attrs['area_cut'] = sum(mask2)
            da.close()
            savefile = '/users/global/cornkle/MCSfiles/WA5000_4-8N_13W-13E_-40_18UTC/' + date.strftime(
                '%Y-%m-%d_%H:%M:%S') + '_' + str(gi) + '.nc'
            try:
                os.remove(savefile)
            except OSError:
                pass
            da.to_netcdf(path=savefile, mode='w')
            print('Saved ' + savefile)

            cnt = cnt + 1

    print('Saved ' + str(cnt) + ' MCSs as netcdf.')
Example #8
0
def saveMCS_WA15(YRANGE):
    trmm_folder = cnst.network_data + 'data/OBS/IMERG_HQ_precip'
    msg_folder = cnst.network_data + 'data/OBS/MSG_WA30' #meteosat_WA30'
    msg_folder2 = cnst.network_data + 'data/OBS/MSG_MAMON'

    mJJAS = msg.ReadMsg(msg_folder)
    mMAMON = msg.ReadMsg(msg_folder2)
    cnt = 0
    for _y in range(YRANGE, YRANGE+1):
        for _m in range(3,12):

            files = glob.glob(trmm_folder + '/'+str(_y) + '/'+str(_m).zfill(2) +'/*.nc4')# area=[-12, 12, 4, 9])   # [-15, 15, 4, 21], [-10, 10, 10, 20]

            for tf in files:


                t = xr.open_dataset(tf)

                _h = t['time.hour'].values[0]

                _d = t['time.day'].values[0]
                _mi = t['time.minute'].values[0]

                if (_h <15) | (_h>21):
                    print('Wrong hour')
                    continue

                if (_m<3) | (_m>11):
                    print('Wrong month')
                    continue

                da = t['HQprecipitation'].squeeze()
                da = da.T
                tdic = da.sel(lat=slice(5,25), lon=slice(-17,15))   #[-12, 15, 5, 25]

                if np.sum(tdic.values) <= 0.01:
                    continue

                if _m in [3,4,5,10,11]:
                    m = mMAMON
                else:
                    m = mJJAS

                date = dt.datetime(_y, _m, _d, _h, _mi)
                arr = np.array([15, 30, 45, 60, 0])

                #get closest minute
                dm = arr - _mi
                if (dm<0).any():
                    dm = dm[dm<0]

                try:
                    ind = (np.abs(dm)).argmin()
                except ValueError:
                    continue

                # set zero shift time for msg


                dt0 = dm[ind]
                ndate = date + dt.timedelta(minutes=int(dt0))
                m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)

                mdic = m.get_data(llbox=[tdic['lon'].values.min(),  tdic['lon'].values.max(), tdic['lat'].values.min(),tdic['lat'].values.max()])

                # check whether date is completely missing or just 30mins interval exists
                # if str(date) == '2004-05-02 13:15:00':
                #     pdb.set_trace()
                if not mdic:
                    dm = np.delete(dm, np.argmin(np.abs(dm)), axis=0)
                    try:
                        dummy = np.min(np.abs(dm))> 15
                    except ValueError:
                        continue
                    if dummy:
                        print('Date missing')
                        continue
                    ind = (np.abs(dm)).argmin()
                    dt0 = dm[ind]
                    ndate = date + dt.timedelta(minutes=int(dt0))
                    m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)
                    mdic = m.get_data(llbox=[tdic['lon'].values.min(),  tdic['lon'].values.max(), tdic['lat'].values.min(),tdic['lat'].values.max()])

                    if not mdic:
                        print('Date missing')
                        continue

                print('TRMM:', date, 'MSG:', ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute )

                lon1 = mdic['lon'].values
                lat1 = mdic['lat'].values
                mdic['t'].values[mdic['t'].values >= -50] = 0  # T threshold -10
                labels, numL = label(mdic['t'].values)

                u, inv = np.unique(labels, return_inverse=True)
                n = np.bincount(inv)

                goodinds = u[n > 556]  # defines minimum MCS size e.g. 350 km2 = 39 pix at 3x3km res , 5000km2 = 556 pixel
                print(goodinds)
                if not sum(goodinds) > 0:
                    continue

                for gi in goodinds:
                    if gi == 0:  # index 0 is always background, ignore!
                        continue

                    inds = np.where(labels == gi)

                    # cut a box for every single blob from msg - get min max lat lon of the blob, cut upper lower from TRMM to match blob
                    latmax, latmin = mdic['lat'].values[inds].max(), mdic['lat'].values[inds].min()
                    lonmax, lonmin = mdic['lon'].values[inds].max(), mdic['lon'].values[inds].min()
                    mmeans = np.percentile(mdic['t'].values[inds], 90)
                    #td = tdic.values #t.get_ddata(date, cut=[latmin - 1, latmax + 1])

                    # ensure minimum trmm rainfall in area
                    # if len(np.where(td['p'].values > 0.1)[0]) < 1:  # at least 1 pixel with rainfall
                    #     print('Kickout: TRMM min pixel < 1')
                    #     continue

                    dt0 = dm[ind]
                    ndate = date + dt.timedelta(minutes=int(dt0))

                    # if (ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) == (2006, 6, 6, 5, 0):
                    #     ipdb.set_trace()

                    ml0 = m.get_data(llbox=[lonmin - 1,  lonmax + 1, latmin - 1, latmax + 1])
                    if not ml0:
                        continue

                    #make salem grid
                    grid = u_grid.make(ml0['lon'].values, ml0['lat'].values,5000)
                    lon, lat = grid.ll_coordinates

                    # interpolate TRM and MSG to salem grid
                    inter, mpoints = u_grid.griddata_input(ml0['lon'].values, ml0['lat'].values,grid)

                    try:
                        outt = u_grid.quick_regrid(tdic['lon'].values, tdic['lat'].values, tdic.values, grid)
                    except ValueError:
                        continue

                    if np.sum(np.isfinite(outt)) < 5:  # at least 2 valid pixel
                        print('Kickout: TRMM wavelet min pixel  < 2')
                        continue

                    ##remove edges of interpolated TRMM
                    for nb in range(5):
                        boole = np.isnan(outt)
                        outt[boole] = -1000
                        grad = np.gradient(outt)
                        outt[boole] = np.nan
                        outt[abs(grad[1]) > 300] = np.nan
                        outt[abs(grad[0]) > 300] = np.nan


                    # Interpolate MSG using delaunay triangularization
                    dummy = griddata(mpoints, ml0['t'].values.flatten(), inter, method='linear')
                    dummy = dummy.reshape((grid.ny, grid.nx))
                    outl = np.full_like(dummy, np.nan)
                    xl, yl = grid.transform(lon1[inds], lat1[inds], crs=salem.wgs84, nearest=True, maskout=True)
                    outl[yl.compressed(), xl.compressed()] = dummy[yl.compressed(), xl.compressed()]

                    # #### SHIFTING WITH RESPECT TO MIN T / MAX P - search for Pmax within 20km from Tmin, shift TRMM image
                    #
                    # tmin = np.argmin(outl)
                    # pmax =
                    #
                    # dist =
                    #

                    tmask = np.isfinite(outt)
                    mmask = np.isfinite(outl)
                    mask2 = np.isfinite(outl[tmask])

                    if (sum(mmask.flatten())*25 < 350) | (outt.max()>250):# or (sum(mmask.flatten())*25 > 1500000): #or (outt.max()<0.1)
                        continue

                    if sum(mask2.flatten()) < 5:  # sum(mmask.flatten())*0.3:
                        print('Kickout: TRMM MSG overlap less than 3pix of cloud area')
                        continue

                    print('Hit:', gi)

                    da = xr.Dataset({'p': (['x', 'y'], outt),
                                     't_lag0': (['x', 'y'], dummy),
                                     'tc_lag0': (['x', 'y'], outl),
                                     },
                                    coords={'lon': (['x', 'y'], lon),
                                            'lat': (['x', 'y'], lat),
                                            'time': date})
                    da.attrs['lag0'] = dt0
                    da.attrs['meanT'] = np.mean(outl[mmask])
                    da.attrs['T90perc'] = mmeans
                    da.attrs['meanT_cut'] = np.mean(outl[tmask][mask2])
                    da.attrs['area'] = sum(mmask.flatten())
                    da.attrs['area_cut'] = sum(mask2)
                    da.close()
                    savefile = cnst.network_data + 'MCSfiles/WA5000_5-25N_12W-15E_-50_afternoon_GPM/' + date.strftime('%Y-%m-%d_%H:%M:%S') + '_' + str(gi) + '.nc'
                    try:
                        os.remove(savefile)
                    except OSError:
                        print('OSError, no dir?')
                        pass
                    da.to_netcdf(path=savefile, mode='w')
                    print('Saved ' + savefile)

                    cnt = cnt + 1

            print('Saved ' + str(cnt) + ' MCSs as netcdf.')
def run(datastring):

    #msg_folder = cnst.network_data + 'data/OBS/meteosat_WA30'
    #ext_drive = '/media/ck/Seagate/DIR/'#
    #local_data = ext_drive + 'mymachine/'
    #network_data = ext_drive + 'cornkle/'

    #msg_folder = network_data + 'data/OBS/meteosat_WA30'

    for yy in range(2004,2006):   # (2004,2016)MSG, (1983,2006) MFG

        for mm in [6,7,8,9]:
            #
            # yy = 1999
            # mm = 9
            # datastring = 'mfg'

            pool = multiprocessing.Pool(processes=4)

            msg_folder = cnst.network_data + 'data/OBS/meteosat_WA30'
            #ext_drive = '/media/ck/Seagate/DIR/'#
            #local_data = ext_drive + 'mymachine/'
            #network_data = ext_drive + 'cornkle/'
            #msg_folder = network_data + 'data/OBS/meteosat_WA30'

            m = msg.ReadMsg(msg_folder, y1=yy, y2=yy, months=[mm])

            files  = m.fpath

            gridll = pkl.load( open (cnst.network_data + 'data/OBS/saves/VERA_msg_latlon_18W12E_1N17N.p', 'rb'))

            mdic = m.read_data(files[0], llbox=[-25, 20, 2, 25])  #[-14, 2.5, 4, 11.5]

            # make salem grid
            grid = u_grid.make(gridll['lon'].values, gridll['lat'].values, 10000)
            inds, weights, shape = u_int.interpolation_weights_grid(mdic['lon'].values, mdic['lat'].values, grid)



            gridd = (inds,weights,shape, grid)

            files_str = []

            for f in files:
                if datastring == 'msg':
                    files_str.append(f[0:-4])
                else:
                    files_str.append(f)

            files_str = np.unique(files_str)

            passit = []
            for f in files_str:
                passit.append((gridd,m, f,datastring))

            #res = pool.map(file_loop, passit)

            res=[]
            for l in passit:

                res.append(file_loop(l))

            pool.close()

            res = [x for x in res if x is not None]

            ds = xr.concat(res, 'time')
            path =  '/prj/vera/cores/'
            savefile = path + 'cores_10k_-40_700km2_-50points_dominant_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'#'blobMap_-40-700km2_-50-points_dominant_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'

            try:
                os.remove(savefile)
            except OSError:
                pass
            #da.name = 'blob'
            #enc = {'blob': {'complevel': 5, 'zlib': True}}

            comp = dict(zlib=True, complevel=5)
            enc = {var: comp for var in ds.data_vars}

            ds.to_netcdf(path=savefile, mode='w', encoding=enc, format='NETCDF4')
            print('Saved ' + savefile)
Example #10
0
def create_map_data():
    # read only trmm files that I need and give out proper lons lats etc
    files = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/2011/06/2A25.20110612.77322.7.gra"

    # /2011/06/2A25.20110612.77322.7.gra"  good to show
    trr = np.fromfile(files, dtype=np.int16)
    x = 49
    nb = trr.size
    single = int(nb / 4)  # variables lon lat rainrate flag

    lons = trr[0:single]
    lats = trr[single:2 * single]
    rainrs = trr[2 * single:3 * single]
    flags = trr[3 * single:4 * single]

    y = int(lons.size / x)
    lons = np.resize(lons, (y, x))
    lats = np.resize(lats, (y, x))
    rainrs = np.resize(rainrs, (y, x))
    flags = np.resize(flags, (y, x))
    lon = lons / 100.
    lat = lats / 100.
    rainr = rainrs / 10.
    lonmin, lonmax = np.amin(lon), np.amax(lon)
    latmin, latmax = np.amin(lat), np.amax(lat)
    lonx = lon[0, :]
    laty = lat[:, 0]
    rainrs.shape

    path = "/users/global/cornkle/data/OBS/meteosat_WA30/cell_blob_files/2011/06/"  # 201106122130 good to show
    filename = "201106122130.gra"
    files = path + filename
    rrShape = (580, 1640)
    rrMDI = np.uint16()
    rr = np.fromfile(files, dtype=rrMDI.dtype)
    rr.shape = rrShape

    path = "/users/global/cornkle/data/OBS/meteosat_WA30/msg_raw_binary/2011/06/"  # 201106122130 good to show
    filename = "201106122130.gra"
    files = path + filename
    rrMDI = np.uint8(255)
    rr2 = np.fromfile(files, dtype=rrMDI.dtype)
    rr2.shape = rrShape
    rr2 = rr2.astype(np.int32) - 173

    msg_latlon = np.load(
        '/users/global/cornkle/data/OBS/meteosat_WA30/MSG_1640_580_lat_lon.npz'
    )
    mlon = msg_latlon['lon']
    mlat = msg_latlon['lat']

    # make salem grid
    grid = u_grid.make(mlon, mlat, 5000)
    xi, yi = grid.ij_coordinates
    glon, glat = grid.ll_coordinates

    # Transform lons, lats to grid
    xt, yt = grid.transform(lon.flatten(), lat.flatten(), crs=salem.wgs84)

    # Convert for griddata input
    tpoints = np.array((yt, xt)).T
    inter = np.array((np.ravel(yi), np.ravel(xi))).T

    # Interpolate using delaunay triangularization
    dummyt = griddata(tpoints, rainrs.flatten(), inter, method='linear')
    outt = dummyt.reshape((grid.ny, grid.nx))

    for nb in range(5):
        boole = np.isnan(outt)
        outt[boole] = -1000
        grad = np.gradient(outt)
        outt[boole] = np.nan
        outt[abs(grad[1]) > 300] = np.nan
        outt[abs(grad[0]) > 300] = np.nan

    xm, ym = grid.transform(mlon.flatten(), mlat.flatten(), crs=salem.wgs84)
    mpoints = np.array((ym, xm)).T
    out = griddata(mpoints, rr2.flatten(), inter, method='linear')
    outp = out.reshape((grid.ny, grid.nx))

    out = griddata(mpoints, rr.flatten(), inter, method='nearest')
    outb = out.reshape((grid.ny, grid.nx))

    data = xr.Dataset(
        {
            'trmm': (['lat', 'lon'], outt),
            'tir': (['lat', 'lon'], outp),
            'tblob': (['lat', 'lon'], outb)
        },
        coords={
            'lat': glat[:, 0],
            'lon': glon[0, :]
        })  #[np.newaxis, :]

    data.to_netcdf(
        '/users/global/cornkle/C_paper/wavelet/saves/maps/trmm_msg_map.nc')
Example #11
0
da = xr.open_dataset(file)
da = da['treecover']  #.sel(LON=slice(-8,-5), LAT=slice(6,8))

# In[65]:

#
# plt.figure(figsize=(9,7))
# plt.imshow(da[-1], origin='lower')
# plt.colorbar()
#

# In[66]:

da.values[np.isnan(da.values)] = 0

grid = u_grid.make(da['LON'].values, da['LAT'].values, 0.05, keep_ll=True)
outt = grid.lookup_transform(da, return_lut=False, method=np.nanmean)

da_new = xr.DataArray(outt,
                      coords={
                          'time': np.arange(2000, 2018),
                          'lat': grid.xy_coordinates[1][:, 0],
                          'lon': grid.xy_coordinates[0][0, :]
                      },
                      dims=['time', 'lat', 'lon'])

da_new.to_netcdf('/prj/vera/cornkle/treefrac_0.05deg.nc')

#
# plt.figure(figsize=(9,7))
# plt.imshow(outt[0], origin='lower')
Example #12
0
def saveMCS():
    trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/"
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_WA30'

    t = trmm.ReadWA(trmm_folder, yrange=YRANGE, area=[-15, 4, 20, 25])  # (ll_lon, ll_lat, ur_lon, ur_lat) define initial TRMM box and scan for swaths in that box
    m = msg.ReadMsg(msg_folder)

    cnt = 0

    # minute array to find closest MSG minute
    arr = np.array([15, 30, 45, 60, 0])

    # loop through TRMM dates - only dates that have a certain number of pixels in llbox are considered
    for _y, _m, _d, _h, _mi in zip(t.dates.y, t.dates.m, t.dates.d, t.dates.h, t.dates.mi):

        tdic = t.get_ddata(_y, _m, _d, _h, _mi, cut=[3,26]) # cut TRMM data at lower/upper lat
        #get value of closest minute
        dm = arr - _mi
        dm = dm[dm<0]
        try:
            ind = (np.abs(dm)).argmin()
        except ValueError:
            continue

        # set smallest lag time for msg
        date = dt.datetime(_y, _m, _d, _h, _mi)

        dt0 = dm[ind]
        ndate = date + dt.timedelta(minutes=int(dt0))
        m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)
        mdic = m.get_data(llbox=[tdic['lon'].values.min(),  tdic['lat'].values.min(), tdic['lon'].values.max(),tdic['lat'].values.max()])

        # check whether date is completely missing or just 30mins interval exists
        if not mdic:
            dm = np.delete(dm, np.argmin(np.abs(dm)), axis=0)
            # try second closest minute
            try:
                dummy = np.min(np.abs(dm))> 15
            except ValueError:
                continue
            if dummy:
                print('Date missing')
                continue
            ind = (np.abs(dm)).argmin()
            dt0 = dm[ind]
            ndate = date + dt.timedelta(minutes=int(dt0))
            m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)
            mdic = m.get_data(llbox=[tdic['lon'].values.min(), tdic['lat'].values.min(), tdic['lon'].values.max(),
                                     tdic['lat'].values.max()])
            if not mdic:
                print('Date missing')
                continue

        print('TRMM:', date, 'MSG:', ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute )

        lon1 = mdic['lon'].values # MSG coords
        lat1 = mdic['lat'].values
        mdic['t'].values[mdic['t'].values >= -10] = 0  # T threshold -10 for clouds
        ### filter minimum cloud size
        labels, numL = label(mdic['t'].values)
        u, inv = np.unique(labels, return_inverse=True)
        n = np.bincount(inv)
        goodinds = u[n > 39]  # defines minimum MCS size e.g. 9x39 ~ 350km2
        print(goodinds) # indices of clouds of "good size"

        if not sum(goodinds) > 0:
            continue

        for gi in goodinds:
            if gi == 0:  # index 0 is always background, ignore!
                continue

            inds = np.where(labels == gi) # position of cloud

            # cut a box for every single blob (cloud) from msg - get min max lat lon of the blob, cut upper lower from TRMM to match blob
            latmax, latmin = lat1[inds].max(), lat1[inds].min()
            lonmax, lonmin = lon1.values[inds].max(), lon1[inds].min()
            mmeans = np.percentile(mdic['t'].values[inds], 90)
            td = t.get_ddata(_y, _m, _d, _h, _mi, cut=[latmin - 1, latmax + 1]) # for each cloud, cut TRMM swath

            dt0 = dm[ind]

            ml0 = m.get_data(llbox=[lonmin - 1, latmin - 1, lonmax + 1, latmax + 1]) # cut cloud box in MSG
            if not ml0:
                continue

            #make salem grid
            grid = u_grid.make(ml0['lon'].values, ml0['lat'].values,5000)  # 5km regular grid from lat/lon coords
            lon, lat = grid.ll_coordinates # 5km grid lat/lon coordinates

            # interpolate TRMM and MSG to 5km common grid
            inter, mpoints = u_grid.griddata_input(ml0['lon'].values, ml0['lat'].values,grid)
            inter, tpoints = u_grid.griddata_input(td['lon'].values, td['lat'].values, grid)

            # Interpolate TRMM using delaunay triangularization
            try:
                dummyt = griddata(tpoints, td['p'].values.flatten(), inter, method='linear')
            except ValueError:
                continue
            outt = dummyt.reshape((grid.ny, grid.nx))

            if np.sum(np.isfinite(outt)) < 5:  # at least 5 valid pixel
                print('Kickout: TRMM min pixel  < 5')
                continue

            # Interpolate TRMM flags USING NEAREST
            dummyf = griddata(tpoints, td['flags'].values.flatten(), inter, method='nearest')
            outf = dummyf.reshape((grid.ny, grid.nx))
            outf=outf.astype(np.float)
            isnot = np.isnan(outt)
            outf[isnot]=np.nan

            ##remove artefact edges of interpolated TRMM
            for nb in range(5):
                boole = np.isnan(outt)
                outt[boole] = -1000
                grad = np.gradient(outt)
                outt[boole] = np.nan
                outt[abs(grad[1]) > 300] = np.nan
                outt[abs(grad[0]) > 300] = np.nan
                outf[abs(grad[1]) > 300] = np.nan
                outf[abs(grad[0]) > 300] = np.nan

            #get convective rainfall only
            outff = tm_utils.getTRMMconv(outf) ## from TRMM flags, get positions of convective rain
            outk = np.zeros_like(outt)
            outk[np.where(outff)]=outt[np.where(outff)]

            # Interpolate MSG using delaunay triangularization
            dummy = griddata(mpoints, ml0['t'].values.flatten(), inter, method='linear')
            dummy = dummy.reshape((grid.ny, grid.nx))
            outl = np.full_like(dummy, np.nan)
            xl, yl = grid.transform(lon1[inds], lat1[inds], crs=salem.wgs84, nearest=True, maskout=True)
            outl[yl.compressed(), xl.compressed()] = dummy[yl.compressed(), xl.compressed()]

            # TODO #### SHIFTING WITH RESPECT TO MIN T / MAX P - search for Pmax within 20km from Tmin, shift TRMM image
            #
            # tmin = np.argmin(outl)
            # pmax =
            #
            # dist =
            #

            tmask = np.isfinite(outt)
            mmask = np.isfinite(outl)
            mask2 = np.isfinite(outl[tmask])

            #last check for min area, crazy rainfall or crazy cloud size
            if (sum(mmask.flatten())*25 < 350) or (outt.max()>200) or (sum(mmask.flatten())*25 > 1500000):
                continue

            if sum(mask2.flatten()) < 5:  # Check minimum overlap between TRMM swath and MSG cloud
                print('Kickout: TRMM MSG overlap less than 3pix of cloud area')
                continue

            print('Hit:', gi)

            da = xr.Dataset({'p': (['x', 'y'], outt),  # rainfall field
                             'pconv': (['x', 'y'], outk), # convective rainfall
                             't_lag0': (['x', 'y'], dummy), # full T image in cutout region
                             'tc_lag0': (['x', 'y'], outl), # cloud area only
                             },
                            coords={'lon': (['x', 'y'], lon),
                                    'lat': (['x', 'y'], lat),
                                    'time': date})
            da.attrs['lag0'] = dt0  # lag in minutes between TRMM / MSG
            da.attrs['meanT'] = np.mean(outl[mmask])  # cloud mean T
            da.attrs['T90perc'] = mmeans # cloud 90perc T
            da.attrs['meanT_cut'] = np.mean(outl[tmask][mask2]) # cloud mean T in TRMM region
            da.attrs['area'] = sum(mmask.flatten()) # total cloud area
            da.attrs['area_cut'] = sum(mask2)  # cloud area overlapping with TRMM
            da.close()
            savefile = '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_zR/' + date.strftime('%Y-%m-%d_%H:%M:%S') + '_' + str(gi) + '.nc'
            try:
                os.remove(savefile)
            except OSError:
                pass
            da.to_netcdf(path=savefile, mode='w')
            print('Saved ' + savefile)

            cnt = cnt + 1

    print('Saved ' + str(cnt) + ' TRMM/MSG merged MCSs as netcdf.')
Example #13
0
def run(dataset, CLOBBER=False):

    for yy in range((filepath[dataset])[2][0],((filepath[dataset])[2][1])+1):   # (2004,2016)

        for mm in (filepath[dataset])[1]:


            tag = dataset[0:3].upper()

            path =  '/prj/vera/cores/' # cnst.network_data + 'MCSfiles/VERA_blobs/'
            savefile = path + 'coresPower_'+tag.upper()+'_-40_700km2_-50points_dominant_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'#'blobMap_-40-700km2_-50-points_dominant_'+str(yy) + '_'+str(mm).zfill(2)+'.nc'

            if not CLOBBER:
                if os.path.isfile(savefile):
                    print('File exists, continue!')
                    continue

            pool = multiprocessing.Pool(processes=3)
            print('Reading '+filepath[dataset][0])
            meteosat_folder = (filepath[dataset])[0]

            if tag == 'MFG':
                m = mfg.ReadMfg(meteosat_folder, y1=yy, y2=yy, months=[mm])
            if tag == 'MSG':
                m = msg.ReadMsg(meteosat_folder, y1=yy, y2=yy, months=[mm])

            files  = m.fpath

            gridll = pkl.load( open (cnst.network_data + 'data/OBS/saves/VERA_msg_latlon_18W12E_1N17N.p', 'rb'))

            mdic = m.read_data(files[0], llbox=[-25, 20, 2, 25])  #[-14, 2.5, 4, 11.5]

            # make salem grid
            grid = u_grid.make(gridll['lon'].values, gridll['lat'].values, 5000)
            inds, weights, shape = u_int.interpolation_weights_grid(mdic['lon'].values, mdic['lat'].values, grid)
            gridd = (inds,weights,shape, grid)

            files_str = []

            for f in files:
                if tag == 'MSG':
                    files_str.append(f[0:-4])
                if tag == 'MFG':
                    files_str.append(f)

            files_str = np.unique(files_str)

            passit = []
            for f in files_str:
                passit.append((gridd,m, f,tag))

            res = pool.map(file_loop, passit)

            # res=[]
            # for l in passit:
            #
            #     res.append(file_loop(l))

            pool.close()

            res = [x for x in res if x is not None]

            try:
                ds = xr.concat(res, 'time')
            except ValueError:
                return

            try:
                os.remove(savefile)
            except OSError:
                pass
            #da.name = 'blob'
            #enc = {'blob': {'complevel': 5, 'zlib': True}}

            comp = dict(zlib=True, complevel=5)
            enc = {var: comp for var in ds.data_vars}

            ds.to_netcdf(path=savefile, mode='w', encoding=enc, format='NETCDF4')
            print('Saved ' + savefile)
Example #14
0
def create_map_data():
    # read only trmm files that I need and give out proper lons lats etc
    files = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/2011/06/2A25.20110612.77322.7.gra"

     # /2011/06/2A25.20110612.77322.7.gra"  good to show
    trr = np.fromfile(files,dtype=np.int16)
    x = 49
    nb = trr.size
    single = int(nb/4) # variables lon lat rainrate flag

    lons = trr[0:single]
    lats = trr[single:2*single]
    rainrs = trr[2*single:3*single]
    flags = trr[3*single:4*single]

    y = int(lons.size/x)
    lons = np.resize(lons, (y,x))
    lats = np.resize(lats, (y,x))
    rainrs = np.resize(rainrs, (y,x))
    flags = np.resize(flags, (y,x))
    lon=lons/100.
    lat=lats/100.
    rainr=rainrs/10.
    lonmin, lonmax=np.amin(lon),np.amax(lon)
    latmin, latmax=np.amin(lat),np.amax(lat)
    lonx=lon[0,:]
    laty=lat[:,0]
    rainrs.shape

    path = "/users/global/cornkle/data/OBS/meteosat_WA30/cell_blob_files/2011/06/"  # 201106122130 good to show
    filename = "201106122130.gra"
    files = path + filename
    rrShape = (580,1640)
    rrMDI = np.uint16()
    rr = np.fromfile(files,dtype=rrMDI.dtype)
    rr.shape = rrShape

    path = "/users/global/cornkle/data/OBS/meteosat_WA30/msg_raw_binary/2011/06/"  # 201106122130 good to show
    filename = "201106122130.gra"
    files = path + filename
    rrMDI = np.uint8(255)
    rr2 = np.fromfile(files, dtype=rrMDI.dtype)
    rr2.shape = rrShape
    rr2 = rr2.astype(np.int32) - 173

    msg_latlon=np.load('/users/global/cornkle/data/OBS/meteosat_WA30/MSG_1640_580_lat_lon.npz')
    mlon = msg_latlon['lon']
    mlat = msg_latlon['lat']

    # make salem grid
    grid = u_grid.make(mlon, mlat, 5000)
    xi, yi = grid.ij_coordinates
    glon, glat = grid.ll_coordinates

    # Transform lons, lats to grid
    xt, yt = grid.transform(lon.flatten(), lat.flatten(), crs=salem.wgs84)

    # Convert for griddata input
    tpoints = np.array((yt, xt)).T
    inter = np.array((np.ravel(yi), np.ravel(xi))).T

    # Interpolate using delaunay triangularization
    dummyt = griddata(tpoints, rainrs.flatten(), inter, method='linear')
    outt = dummyt.reshape((grid.ny, grid.nx))

    for nb in range(5):
        boole = np.isnan(outt)
        outt[boole] = -1000
        grad = np.gradient(outt)
        outt[boole] = np.nan
        outt[abs(grad[1]) > 300] = np.nan
        outt[abs(grad[0]) > 300] = np.nan

    xm, ym = grid.transform(mlon.flatten(), mlat.flatten(), crs=salem.wgs84)
    mpoints = np.array((ym, xm)).T
    out = griddata(mpoints, rr2.flatten(), inter, method='linear')
    outp = out.reshape((grid.ny, grid.nx))

    out = griddata(mpoints, rr.flatten(), inter, method='nearest')
    outb = out.reshape((grid.ny, grid.nx))

    data = xr.Dataset({'trmm': (['lat', 'lon'], outt),
                       'tir': (['lat', 'lon'], outp),
                       'tblob' : (['lat', 'lon'], outb)},
             coords={ 'lat': glat[:,0], 'lon':glon[0,:]}) #[np.newaxis, :]

    data.to_netcdf('/users/global/cornkle/C_paper/wavelet/saves/maps/trmm_msg_map.nc')
Example #15
0
def saveMCS_WA15():
    trmm_folder = "/users/global/cornkle/data/OBS/TRMM/trmm_swaths_WA/"
    msg_folder = '/users/global/cornkle/data/OBS/meteosat_tropWA' #meteosat_WA30'

    t = trmm_clover.ReadWA(trmm_folder, yrange=YRANGE, area=[-13, 13, 4, 8])   # [-15, 15, 4, 21], [-10, 10, 10, 20]
    m = msg.ReadMsg(msg_folder)

    cnt = 0

    # define the "0 lag" frist
    arr = np.array([15, 30, 45, 60, 0])
    #mon = [3,4,5] # months march april may only
    # cycle through TRMM dates - only dates tat have a certain number of pixels in llbox are considered      
    for _y, _m, _d, _h, _mi in zip(t.dates.dt.year,  t.dates.dt.month, t.dates.dt.day, t.dates.dt.hour, t.dates.dt.minute):

        if (_h <10) | (_h>21):
            continue

        if (_m<9) | (_m>10):
            continue

        date = dt.datetime(_y, _m, _d, _h, _mi)

        tdic = t.get_ddata(date, cut=[4, 8])


        #get closest minute
        dm = arr - _mi
        dm = dm[dm<0]
        try:
            ind = (np.abs(dm)).argmin()
        except ValueError:
            continue

        # set zero shift time for msg


        dt0 = dm[ind]
        ndate = date + dt.timedelta(minutes=int(dt0))
        m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)

        mdic = m.get_data(llbox=[tdic['lon'].values.min(),  tdic['lon'].values.max(), tdic['lat'].values.min(),tdic['lat'].values.max()])

        # check whether date is completely missing or just 30mins interval exists
        # if str(date) == '2004-05-02 13:15:00':
        #     pdb.set_trace()
        if not mdic:
            dm = np.delete(dm, np.argmin(np.abs(dm)), axis=0)
            try:
                dummy = np.min(np.abs(dm))> 15
            except ValueError:
                continue
            if dummy:
                print('Date missing')
                continue
            ind = (np.abs(dm)).argmin()
            dt0 = dm[ind]
            ndate = date + dt.timedelta(minutes=int(dt0))
            m.set_date(ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute)
            mdic = m.get_data(llbox=[tdic['lon'].values.min(),  tdic['lon'].values.max(), tdic['lat'].values.min(),tdic['lat'].values.max()])

            if not mdic:
                print('Date missing')
                continue

        print('TRMM:', date, 'MSG:', ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute )

        lon1 = mdic['lon'].values
        lat1 = mdic['lat'].values
        mdic['t'].values[mdic['t'].values >= -40] = 0  # T threshold -10
        labels, numL = label(mdic['t'].values)

        u, inv = np.unique(labels, return_inverse=True)
        n = np.bincount(inv)

        goodinds = u[n > 556]  # defines minimum MCS size e.g. 350 km2 = 39 pix at 3x3km res , 5000km2 = 556 pixel
        print(goodinds)
        if not sum(goodinds) > 0:
            continue

        for gi in goodinds:
            if gi == 0:  # index 0 is always background, ignore!
                continue

            inds = np.where(labels == gi)

            # cut a box for every single blob from msg - get min max lat lon of the blob, cut upper lower from TRMM to match blob
            latmax, latmin = mdic['lat'].values[inds].max(), mdic['lat'].values[inds].min()
            lonmax, lonmin = mdic['lon'].values[inds].max(), mdic['lon'].values[inds].min()
            mmeans = np.percentile(mdic['t'].values[inds], 90)
            td = t.get_ddata(date, cut=[latmin - 1, latmax + 1])

            # ensure minimum trmm rainfall in area
            # if len(np.where(td['p'].values > 0.1)[0]) < 1:  # at least 1 pixel with rainfall
            #     print('Kickout: TRMM min pixel < 1')
            #     continue

            dt0 = dm[ind]
            ndate = date + dt.timedelta(minutes=int(dt0))

            # if (ndate.year, ndate.month, ndate.day, ndate.hour, ndate.minute) == (2006, 6, 6, 5, 0):
            #     ipdb.set_trace()

            ml0 = m.get_data(llbox=[lonmin - 1,  lonmax + 1, latmin - 1, latmax + 1])
            if not ml0:
                continue

            #make salem grid
            grid = u_grid.make(ml0['lon'].values, ml0['lat'].values,5000)
            lon, lat = grid.ll_coordinates

            # interpolate TRM and MSG to salem grid
            inter, mpoints = u_grid.griddata_input(ml0['lon'].values, ml0['lat'].values,grid)
            inter, tpoints = u_grid.griddata_input(td['lon'].values, td['lat'].values, grid)

            # Interpolate TRMM using delaunay triangularization
            try:
                dummyt = griddata(tpoints, td['p'].values.flatten(), inter, method='linear')
            except ValueError:
                continue
            outt = dummyt.reshape((grid.ny, grid.nx))
            # if len(np.where(outt > 0.1)[0]) < 2:  # at least 2 pixel with rainfall
            #     print('Kickout: TRMM wavelet min pixel pcp < 2')
            #     continue

            if np.sum(np.isfinite(outt)) < 5:  # at least 2 valid pixel
                print('Kickout: TRMM wavelet min pixel  < 2')
                continue

            # Interpolate TRMM flags using nearest
            dummyf = griddata(tpoints, td['flags'].values.flatten(), inter, method='nearest')
            outf = dummyf.reshape((grid.ny, grid.nx))
            outf=outf.astype(np.float)
            isnot = np.isnan(outt)
            outf[isnot]=np.nan

            ##remove edges of interpolated TRMM
            for nb in range(5):
                boole = np.isnan(outt)
                outt[boole] = -1000
                grad = np.gradient(outt)
                outt[boole] = np.nan
                outt[abs(grad[1]) > 300] = np.nan
                outt[abs(grad[0]) > 300] = np.nan
                outf[abs(grad[1]) > 300] = np.nan
                outf[abs(grad[0]) > 300] = np.nan

            #get convective rainfall only
            outff = tm_utils.getTRMMconv(outf)
            outk = outt.copy()*0
            outk[np.where(outff)]=outt[np.where(outff)]


            # Interpolate MSG using delaunay triangularization
            dummy = griddata(mpoints, ml0['t'].values.flatten(), inter, method='linear')
            dummy = dummy.reshape((grid.ny, grid.nx))
            outl = np.full_like(dummy, np.nan)
            xl, yl = grid.transform(lon1[inds], lat1[inds], crs=salem.wgs84, nearest=True, maskout=True)
            outl[yl.compressed(), xl.compressed()] = dummy[yl.compressed(), xl.compressed()]

            # #### SHIFTING WITH RESPECT TO MIN T / MAX P - search for Pmax within 20km from Tmin, shift TRMM image
            #
            # tmin = np.argmin(outl)
            # pmax =
            #
            # dist =
            #

            tmask = np.isfinite(outt)
            mmask = np.isfinite(outl)
            mask2 = np.isfinite(outl[tmask])

            if (sum(mmask.flatten())*25 < 350) | (outt.max()>200):# or (sum(mmask.flatten())*25 > 1500000): #or (outt.max()<0.1)
                continue

            if sum(mask2.flatten()) < 5:  # sum(mmask.flatten())*0.3:
                print('Kickout: TRMM MSG overlap less than 3pix of cloud area')
                continue

            print('Hit:', gi)

            da = xr.Dataset({'p': (['x', 'y'], outt),
                             'pconv': (['x', 'y'], outk),
                             't_lag0': (['x', 'y'], dummy),
                             'tc_lag0': (['x', 'y'], outl),
                             },
                            coords={'lon': (['x', 'y'], lon),
                                    'lat': (['x', 'y'], lat),
                                    'time': date})
            da.attrs['lag0'] = dt0
            da.attrs['meanT'] = np.mean(outl[mmask])
            da.attrs['T90perc'] = mmeans
            da.attrs['meanT_cut'] = np.mean(outl[tmask][mask2])
            da.attrs['area'] = sum(mmask.flatten())
            da.attrs['area_cut'] = sum(mask2)
            da.close()
            savefile = '/users/global/cornkle/MCSfiles/WA5000_4-8N_13W-13E_-40_18UTC/' + date.strftime('%Y-%m-%d_%H:%M:%S') + '_' + str(gi) + '.nc'
            try:
                os.remove(savefile)
            except OSError:
                pass
            da.to_netcdf(path=savefile, mode='w')
            print('Saved ' + savefile)

            cnt = cnt + 1

    print('Saved ' + str(cnt) + ' MCSs as netcdf.')