Ejemplo n.º 1
0
def test():
    """ Test Dataset functionality

    >>> data = test() 
    >>> data['test2'] = da.DimArray([0,3],('source',['greenland','antarctica'])) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
        ...
    ValueError: axes values do not match, align data first.                            
    Dataset: source(1)=greenland:greenland, 
    Got: source(2)=greenland:antarctica
    >>> data['ts']
    dimarray: 5 non-null elements (5 null)
    dimensions: 'time'
    0 / time (10): 1950 to 1959
    array([  0.,   1.,   2.,   3.,   4.,  nan,  nan,  nan,  nan,  nan])
    >>> data.to_array(axis='items')
    dimarray: 12250 non-null elements (1750 null)
    dimensions: 'items', 'lon', 'lat', 'time', 'source'
    0 / items (4): mymap to test
    1 / lon (50): -180.0 to 180.0
    2 / lat (7): -90.0 to 90.0
    3 / time (10): 1950 to 1959
    4 / source (1): greenland to greenland
    array(...)
    """
    import dimarray as da
    axes = da.Axes.from_tuples(('time', [1, 2, 3]))
    ds = da.Dataset()
    a = da.DimArray([[0, 1], [2, 3]], dims=('time', 'items'))
    ds['yo'] = a.reindex_like(axes)

    np.random.seed(0)
    mymap = da.DimArray.from_kw(np.random.randn(50, 7),
                                lon=np.linspace(-180, 180, 50),
                                lat=np.linspace(-90, 90, 7))
    ts = da.DimArray(np.arange(5), ('time', np.arange(1950, 1955)))
    ts2 = da.DimArray(np.arange(10), ('time', np.arange(1950, 1960)))

    # Define a Dataset made of several variables
    data = da.Dataset({'ts': ts, 'ts2': ts2, 'mymap': mymap})
    #data = da.Dataset([ts, ts2, mymap], keys=['ts','ts2','mymap'])

    assert np.all(
        data['ts'].time == data['ts2'].time), "Dataset: pb data alignment"

    data['test'] = da.DimArray([0], ('source', ['greenland']))  # new axis
    #data

    return data
Ejemplo n.º 2
0
def my_read_vcm(f=f, verbose=False):
    filename = f
    lon = da.read_nc(filename, 'lon', verbose=verbose).values
    lat = da.read_nc(filename, 'lat', verbose=verbose).values
    data = da.Dataset()
    data['cal333'] = da.read_nc(filename, 'cal333', verbose=verbose)
    altitude = data['cal333'].altitude
def compound_precip_temp_index(combinations, out_file):
    """
	Not documented yet
	"""

    out = {}

    for name, conditions in combinations.items():
        conds = []
        description = []
        for condition in conditions:
            nc = da.read_nc(condition[0])
            conds.append(nc[condition[1]].squeeze())
            description.append(nc[condition[1]].description)

        compound_state = conds[0].copy()
        compound_state[:] = False
        for cond in conds:
            compound_state += cond

        compound_state /= len(conds)
        out[name] = da.DimArray(np.array(compound_state.values, dtype=np.byte),
                                axes=compound_state.axes,
                                dims=compound_state.dims,
                                dtype=np.byte)
        out[name].description = ' AND '.join(description)

    da.Dataset(out).write_nc(out_file)
Ejemplo n.º 4
0
def csat_no_cal(files, outname, where):

    v = vcm.VCM(files, verbose=False)
    cal = v.get_vcm('cal333+cal05+cal20+cal80')
    csat = v.get_vcm('csat')

    calcloudy = np.sum(cal, axis=1)
    csatcloudy = np.sum(csat, axis=1)

    csatonly = np.where((csatcloudy > 0) & (calcloudy == 0))[0]
    print 100. * csatonly.shape[0] / calcloudy.shape[0]

    vcm_csatonly = np.zeros_like(csat)
    vcm_csatonly[csatonly, :] = csat[csatonly, :]
    csatcloudy[calcloudy > 0] = 0

    dset = da.Dataset()
    time_axis = da.Axis(v.time, 'tai_time')
    alt_axis = da.Axis(v.altitude, 'altitude')
    dset['lon'] = da.DimArray(v.lon, [time_axis])
    dset['lat'] = da.DimArray(v.lat, [time_axis])
    dset['csat'] = da.DimArray(vcm_csatonly, [time_axis, alt_axis])
    dset['cloudpts'] = da.DimArray(csatcloudy, [time_axis])

    check_out_dir(where)

    dset.write_nc(where + outname, 'w', zlib=True, complevel=9)
Ejemplo n.º 5
0
def vcm_dataset_from_l2_orbit(filename):
    
    #print 'Creating vcm dataset from l2 file ' + filename
    
    l2 = calipso_l2.Cal2(filename)
    lon, lat = l2.coords()
    tai_time = l2.time()
    nl, base, top = l2.layers()
    havg = l2.horizontal_averaging()
    ltype = l2.layer_type()
    tai_time_min, tai_time_max = l2.time_bounds()
    tropo = l2.tropopause_height()
    elevation = l2.dem_surface_elevation()[:,1]
    l2.close()
    
    tropo[lat < -60] = 11.
    tropo[lat > 60] = 11.
    
    dset = da.Dataset()
    
    time_axis = da.Axis(tai_time, 'tai_time')
    alt_axis = da.Axis(vcm_alt, 'altitude')
    
    for havg_vcm in havgs_vcm:
        vcm = vcm_from_layers(nl, base, top, havg, ltype, tropo, only_havg=havg_vcm)
        vcm_name = 'cal%02d' % (havg_vcm)
        dset[vcm_name] = da.DimArray(vcm, [time_axis, alt_axis])
    
    dset['lon'] = da.DimArray(lon, [time_axis])
    dset['lat'] = da.DimArray(lat, [time_axis])
    dset['time_min'] = da.DimArray(tai_time_min, [time_axis])
    dset['time_max'] = da.DimArray(tai_time_max, [time_axis])
    dset['elevation'] = da.DimArray(elevation, [time_axis])

    return dset
Ejemplo n.º 6
0
    def gather_info_track(self, overwrite=False):
        out_file = self._working_dir + 'surrounding_info.nc'
        if overwrite and os.path.isfile(out_file):
            os.system('rm ' + out_file)
        elif overwrite == False and os.path.isfile(out_file):
            self._track_info = da.read_nc(out_file)
            return self._track_info

        track_info = {}
        for id_, track in self._tcs.items():
            track = track[np.isfinite(track[:, 't']), :]
            info = np.zeros([
                6, track.shape[0], self._win2 * 2 + 1, self._win2 * 2 + 1
            ]) * np.nan
            for i, p in enumerate(track.values.tolist()):
                box_2 = [
                    int(bb) for bb in self.get_box(p[1], p[2], self._win2)
                ]
                info[0, i,
                     abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                     abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                     12] = self._VO[int(p[0]), box_2[0]:box_2[1],
                                    box_2[2]:box_2[3]]
                info[1, i,
                     abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                     abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                     12] = self._MSLP[int(p[0]), box_2[0]:box_2[1],
                                      box_2[2]:box_2[3]]
                info[2, i,
                     abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                     abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                     12] = self._Wind10[int(p[0]), box_2[0]:box_2[1],
                                        box_2[2]:box_2[3]]
                if self._SST is not None:
                    info[3, i,
                         abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                         abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                         12] = self._SST[int(p[0]), box_2[0]:box_2[1],
                                         box_2[2]:box_2[3]]
                if self._T is not None:
                    info[4, i,
                         abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                         abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                         12] = self._T[int(p[0]), 0, box_2[0]:box_2[1],
                                       box_2[2]:box_2[3]]
                    info[5, i,
                         abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12,
                         abs(p[2] - box_2[2] - 12):box_2[3] - p[2] +
                         12] = self._T[int(p[0]), 1, box_2[0]:box_2[1],
                                       box_2[2]:box_2[3]]
            track_info[str(id_)] = da.DimArray(
                info,
                axes=[['VO', 'MSLP', 'Wind10', 'SST', 'T850', 'T500'],
                      range(len(track.time)),
                      range(self._win2 * 2 + 1),
                      range(self._win2 * 2 + 1)],
                dims=['time_id', 'variable', 'lat', 'lon'])
        self._track_info = da.Dataset(track_info)
        self._track_info.write_nc(out_file, mode='w')
def precip_to_index(
    in_file,
    out_file,
    var_name='pr',
    unit_multiplier=1,
    states={
        'dry': {
            'mod': 'below',
            'threshold': 1
        },
        'wet': {
            'mod': 'above',
            'threshold': 1
        },
        '5mm': {
            'mod': 'above',
            'threshold': 5
        },
        '10mm': {
            'mod': 'above',
            'threshold': 10
        }
    }):
    """
	Classifies daily precipitation into 'wet' and 'dry' days based on a `threshold`

	Parameters
	----------
		anom_file: str
			filepath of a daily precipitation file. The variable that is read in can be specified with `var_name`.
		out_file: str
			filepath of a state file
		var_name: str
			name of the variable read in `anom_file`
		threshold: float,default=0.5
			threshold used to differentiate between wet and dry days
		unit_multiplier: float,default=1
			factor to multiply daily precipiation with to get mm as units
		overwrite: bool
			overwrites existing files
	"""
    nc = da.read_nc(in_file)
    pr = nc[var_name].squeeze() * unit_multiplier

    out = {}
    for name, state_dict in states.items():
        state = nc[var_name].squeeze().copy()
        state[:] = False
        if state_dict['mod'] == 'above':
            state[pr >= state_dict['threshold']] = True
        if state_dict['mod'] == 'below':
            state[pr <= state_dict['threshold']] = True
        out[name] = da.DimArray(np.array(state.values, dtype=np.byte),
                                axes=state.axes,
                                dims=state.dims,
                                dtype=np.byte)
        out[name].description = 'days with precipitation ' + state_dict[
            'mod'] + ' ' + str(state_dict['threshold']) + 'mm'
    da.Dataset(out).write_nc(out_file)
Ejemplo n.º 8
0
def reindex_vcms(vcm, vcm5, vcmc):

    # vcmc can be None

    mintime5, maxtime5 = vcm5['time_min'].values, vcm5['time_max'].values

    nprof5 = mintime5.shape[0]
    time333 = vcm['time']
    nprof333 = time333.shape[0]

    # first find 333m profiles indexes for a given 5km profile
    n1, n2 = np.zeros(nprof5, 'int16'), np.zeros(nprof5, 'int16')
    n = 0
    for i in np.r_[0:nprof5]:
        n1[i] = n
        while n < nprof333 and time333[n] < maxtime5[i]:
            n += 1
        n2[i] = n
        # print 'Range for profile5 %d = %d - %d' % (i, n1[i], n2[i])

    # reindex all flags on the same 333m coordinates

    time_axis = da.Axis(vcm['time'], 'tai_time')
    alt_axis = da.Axis(vcm['altitude'], 'altitude')
    reindexed = da.Dataset()
    reindexed['lon'] = da.DimArray(vcm['lon'], [
        time_axis,
    ])
    reindexed['lat'] = da.DimArray(vcm['lat'], [
        time_axis,
    ])
    reindexed['cal333'] = da.DimArray(vcm['cal333'], [time_axis, alt_axis])

    # remap CALIPSO flag
    for vcm_name in 'cal05', 'cal20', 'cal80':
        this_vcm = remap_profiles(vcm5[vcm_name].values, n1, n2, nprof333)
        reindexed[vcm_name] = da.DimArray(this_vcm,
                                          labels=reindexed['cal333'].labels,
                                          dims=reindexed['cal333'].dims)

    # remap cloudsat flag
    if vcmc is None:
        this_vcm = np.ones_like(vcm['cal333'], 'int8') * -1.
    else:
        vcmc.values = remove_ground_clutter(vcmc.values,
                                            vcm5['elevation'].values,
                                            vcm5['cal05'].altitude)
        this_vcm = remap_profiles(vcmc.values, n1, n2, nprof333)
    reindexed['csat'] = da.DimArray(this_vcm,
                                    labels=reindexed['cal333'].labels,
                                    dims=reindexed['cal333'].dims)

    # Now we have
    # cal333, cal05, cal20, cal80, csat in reindexed.

    return reindexed
    def detect_dieng(self,
                     overwrite=False,
                     dis_VO_max=8,
                     min_number_cells=6,
                     thr_VO=1 * 10**(-5),
                     thr_RH=50):
        out_file = self._working_dir + str(
            self._identifier) + '_detected_positions.nc'
        if overwrite and os.path.isfile(out_file):
            os.system('rm ' + out_file)
        elif overwrite == False and os.path.isfile(out_file):
            self._detected = da.read_nc(out_file)['detected']
            return self._detected

        # convert distances from degrees into grid-cells
        dis_VO_max = self.degree_to_step(dis_VO_max)

        detect = np.array([[np.nan] * 6])
        print('detecting\n10------50-------100')
        for t, progress in zip(
                self._time_i,
                np.array([['-'] + [''] * (len(self._time_i) / 20 + 1)] *
                         20).flatten()[0:len(self._time_i)]):
            sys.stdout.write(progress)
            sys.stdout.flush()
            coords = peak_local_max(self._VO[t, :, :],
                                    min_distance=int(dis_VO_max))
            #print(coords)
            for y_, x_ in zip(coords[:, 0], coords[:, 1]):
                if self._VO[t, y_, x_] > thr_VO:
                    yy, xx = self.find_group(field=self._VO[t, :, :],
                                             y=y_,
                                             x=x_,
                                             thresh=thr_VO)
                    if len(yy) >= min_number_cells:
                        if self._RH[t, y_,
                                    x_] >= thr_RH and self._lat[y_] < 35:
                            #y_,x_ = sum(yy) / len(yy), sum(xx) / len(yy)
                            tmp = [
                                t, y_, x_, self._VO[t, y_, x_], self._RH[t, y_,
                                                                         x_],
                                len(yy)
                            ]
                            detect = np.concatenate((detect, np.array([tmp])))

        self._detected = da.DimArray(
            np.array(detect[1:, :]),
            axes=[
                range(detect.shape[0] - 1),
                ['t', 'y', 'x', 'VO', 'RH', 'members']
            ],
            dims=['ID', 'z'])
        da.Dataset({'detected': self._detected}).write_nc(out_file, mode='w')
        print('\ndone')
        return self._detected
def create_nc_monthly_variable_1d(exp, varibale, fname, dim_name):
    months = np.arange(len(exp))

    slr_po = da.DimArray(exp.tolist(), axes=[months], dims=['months'])
    month = da.DimArray(months.tolist(), axes=[months], dims=['months'])
    #     month_co=da.DimArray(mons_cons.tolist(),axes = [mons_cons],dims = ['months'])
    #     year_date=da.DimArray(years_mon.tolist(),axes = [mons_cons],dims = ['months'])
    #     dim_name = my_vari_name(variable)
    dataset = da.Dataset({variable: slr_po})
    #     print(clu+fname+dim_name)
    dataset.write_nc(clu_path + fname + dim_name)
Ejemplo n.º 11
0
    def from_memory(self, id=None):
        """ retrieve glacier state variable from fortran code
        """
        if id is not None:
            wrapper.associate_glacier(id)

        ds = da.Dataset()
        for v in self._names:
            ds[v] = wrapper.get_var(v)
        ds.set_axis(wrapper.get_var('x'), name='x', inplace=True)
        gl = self.from_dataset(ds)
        gl.params = wrapper.get_nml()
        return gl
Ejemplo n.º 12
0
    def compute_mass_balance(self, init=True):
        if init:
            self._in_memory_init(
                self.id
            )  # set a in-memory glacier that is ready for further computation
        wrapper.update_massbalance()  # compute all mass balance fluxes

        ds = da.Dataset()
        _variables = ["smb", "basalmelt", "fjordmelt", "dynmb"]  # mass balance
        for v in _variables:
            ds[v] = wrapper.get_var(v)
        ds.set_axis(wrapper.get_var('x'), name='x', inplace=True)
        return ds
def temp_anomaly_to_ind_old(anom_file,
                            out_file,
                            var_name='tas',
                            seasons={
                                'MAM': [3, 4, 5],
                                'JJA': [6, 7, 8],
                                'SON': [9, 10, 11],
                                'DJF': [12, 1, 2]
                            },
                            overwrite=True):
    """
	Classifies daily temperature anomalies into 'cold' and 'warm' days using the season and grid-cell specific median as threshold
	Parameters
	----------
		anom_file: str
			filepath of a temperature anomalies file. The variable that is read in can be specified with `var_name`.
		out_file: str
			filepath of a state file
		var_name: str
			name of the variable read in `anom_file`
		seasons: dict, default=`{'MAM':{'months':[3,4,5],'index':0}, 'JJA':{'months':[6,7,8],'index':1}, 'SON':{'months':[9,10,11],'index':2}, 'DJF':{'months':[12,1,2],'index':3}}``
			dictionnary used to cluster detected periods into seasons. If no seasonal analysis is required use `seasons={'year':{'months':range(12),'index':0}}`
		overwrite: bool
			overwrites existing files
	"""
    nc = da.read_nc(anom_file)
    if 'calendar' in nc['time'].attrs.keys():
        datevar = num2date(nc['time'].values,
                           units=nc['time'].units,
                           calendar=nc['time'].calendar)
    else:
        datevar = num2date(nc['time'].values, units=nc['time'].units)
    month = np.array([date.month for date in datevar])

    anom = nc[var_name].squeeze()

    state = nc[var_name].squeeze().copy() * np.nan

    for season in seasons.keys():
        days_in_season = np.where((month == seasons[season][0])
                                  | (month == seasons[season][1])
                                  | (month == seasons[season][2]))[0]
        seasonal_median = np.nanmedian(anom.ix[days_in_season, :, :], axis=0)
        anom.ix[days_in_season, :, :] -= seasonal_median

    state[anom >= 0] = 1
    state[anom < 0] = -1

    if overwrite: os.system('rm ' + out_file)
    state.description = 'daily anomalies - seasonal medain of daily anomalies at grid cell level. positive anomalies -> 1 negative anomalies -> -1'
    da.Dataset({'state': state}).write_nc(out_file)
Ejemplo n.º 14
0
 def compute_stress(self, init=True):
     """ compute stress associated with current velocity and glacier profile
     """
     if init:
         self._in_memory_init(
             self.id
         )  # set a in-memory glacier that is ready for further computation
     wrapper.compute_stress()
     ds = da.Dataset()
     _stress_variables = ["driving", "lat", "long", "basal", "residual"]
     for v in [s + '_stress' for s in _stress_variables]:
         ds[v] = wrapper.get_var(v)
     ds.set_axis(wrapper.get_var('x'), name='x', inplace=True)
     return ds
Ejemplo n.º 15
0
    def load_path(path, variables=None, method="after"):
        """Load variables along a path

        Parameters
        ----------
        path : list of [(x0,y0), (x1, y1), ...] coordinates
        variables : list, optional
            of variables to be loaded (by default, all in a dataset)
        method : str, optional
            method to sample the data, by default "after", which indicates
            the grid point at or just after the match, as returned
            by searchsorted
        """
        if method == "after":
            pass
        elif method == "nearest":
            raise NotImplementedError()
        elif method == "linear":
            raise NotImplementedError()
        else:
            raise ValueError("Invalid method: " + method)
        xs, ys = zip(
            *path)  # [(x0, y0), (x1, ...)] into [[x0,x1..], [y0, y1..]]
        xs = np.asarray(xs)
        ys = np.asarray(ys)
        l = np.min(xs)
        r = np.max(xs)
        b = np.min(ys)
        t = np.max(ys)
        data2d = load_map_func(variables=variables, bbox=[l, r, b, t])
        # add a new coordinate s
        diff_s = np.sqrt(np.square(np.diff(xs)) + np.square(np.diff(ys)))
        s = np.concatenate(([0], np.cumsum(diff_s)))
        datapath = da.Dataset()
        # add x and y as variables in the dataset
        datapath['x'] = da.DimArray(xs, axes=[s], dims=['s'])
        datapath['x'].long_name = "x-coordinate along sample path"
        datapath['y'] = da.DimArray(ys, axes=[s], dims=['s'])
        datapath['y'].long_name = "y-coordinate along sample path"
        # now extract dataset...
        i = np.searchsorted(data2d.y, ys)
        j = np.searchsorted(data2d.x, xs)
        i[i == data2d.y.size] -= 1  # out-of-bound
        j[j == data2d.x.size] -= 1
        for v in data2d.keys():
            pathvals = data2d[v].values[i, j]
            datapath[v] = da.DimArray(pathvals, axes=[s], dims=['s'])
            datapath[v].attrs.update(data2d[v].attrs)
        return datapath
Ejemplo n.º 16
0
def cf_from_vcm_orbit(vcm_orbit, layers):

    print 'Creating vcm from ', vcm_orbit

    # read data
    v = vcm.VCM(vcm_orbit, verbose=False)

    print '%d profiles' % (v.lon.shape[0])

    lon_axis = da.Axis(lonbins[:-1], 'lon')
    lat_axis = da.Axis(latbins[:-1], 'lat')

    # gridded number of profiles
    h, xx, yy = np.histogram2d(v.lon, v.lat, bins=(lonbins, latbins))

    out = da.Dataset()
    out['nprof'] = da.DimArray(h * 3, [lon_axis, lat_axis])
    out['nprof'].longname = 'Number of measured profiles'

    for layer in layers:

        altrange = layers[layer]
        altidx = np.where((v.altitude >= altrange[0])
                          & (v.altitude < altrange[1]))[0]

        for vcm_name in vcm_names:

            # number of cloudy profiles in grid and altitude range

            this_vcm = v.get_vcm(vcm_name)
            assert this_vcm is not None

            if layer == 'total':
                t = this_vcm
            else:
                t = np.take(this_vcm, altidx, axis=1)
            cloudy_profile = np.sum(t, axis=1)
            np.clip(cloudy_profile, 0, 3, out=cloudy_profile)

            h, xx, yy = np.histogram2d(v.lon,
                                       v.lat,
                                       bins=(lonbins, latbins),
                                       weights=cloudy_profile)
            outname = vcm_name + '_cprof_%s' % (layer)
            out[outname] = da.DimArray(h, [lon_axis, lat_axis])
            out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + vcm_name + ' at altitudes %5.2f - %5.2f' % (
                altrange[0], altrange[1])

    return out
Ejemplo n.º 17
0
def project_slr(scen, gmt, settings):

    projection_data = {}

    temp_anomaly_years = pd.read_csv(os.path.join(settings.calibfolder,
                                                  "temp_anomaly_years.csv"),
                                     index_col=[0, 1])
    temp_anomaly_years = temp_anomaly_years.where(
        pd.notnull(temp_anomaly_years), None)

    for i, contrib_name in enumerate(settings.project_these):

        print "conribution", contrib_name

        realizations = np.arange(settings.nrealizations)
        calibdata = pd.read_csv(os.path.join(settings.calibfolder,
                                             contrib_name + ".csv"),
                                index_col=[0])

        temp_anomaly_year = temp_anomaly_years.loc[contrib_name]
        sl_contributor = cf.contributor_functions[contrib_name]

        proj = np.zeros([len(settings.proj_period), settings.nrealizations])

        for n in realizations:
            slr, gmt_n, obs_choice, params = project(gmt, settings.proj_period,
                                                     calibdata,
                                                     temp_anomaly_year,
                                                     sl_contributor, n,
                                                     contrib_name)
            proj[:, n] = slr

        pdata = da.DimArray(proj,
                            axes=[settings.proj_period, realizations],
                            dims=["time", "runnumber"])

        projection_data[contrib_name] = pdata

    if not os.path.exists(settings.projected_slr_folder):
        os.makedirs(settings.projected_slr_folder)

    fname = "projected_slr_" + scen + "_n" + str(
        settings.nrealizations) + ".nc"
    da.Dataset(projection_data).write_nc(
        os.path.join(settings.projected_slr_folder, fname))
    print "Sea level projection data written to"
    print settings.projected_slr_folder
Ejemplo n.º 18
0
    def to_dataset(self, compute_elevation=False):
        ds = da.Dataset()
        for v in self._names:
            ds[v] = getattr(self, v.lower())
            # ds[v] = da.DimArray(getattr(self, v.lower()), axes=[self.x], dims=['x'])
        # also add glacier elevation gl, xgl
        if compute_elevation:
            hb, hs, gl, xgl = self.compute_elevation()
            ds['hb'] = hb
            ds['hs'] = hs
            ds['gl'] = gl
            ds['xgl'] = xgl

        # check basal mode
        if self._beta is not None:
            ds['beta'] = self._beta

        ds.set_axis(self.x, name='x', inplace=True)
        return ds
Ejemplo n.º 19
0
def cflon(f, altmin, latbounds):

    # altmin is an array

    v = vcm.VCM(f, verbose=False)
    out = da.Dataset()

    lon_axis = da.Axis(lonbins[:-1], 'lon')

    # number of profiles per lon bin
    h, xx = np.histogram(v.lon, bins=lonbins)
    out['nprof'] = da.DimArray(h, [lon_axis])
    out['nprof'].longname = 'Number of measured profiles'

    for n in names:

        cv = v.get_vcm(n)
        assert cv is not None

        # clip latitudes
        latidx = (v.lat >= latbounds[0]) & (v.lat < latbounds[1])
        cv = np.take(cv, latidx, axis=0)
        lon = np.take(v.lon, latidx, axis=0)

        outdict = dict()

        for a in altmin:

            idx = np.where(v.altitude >= a)[0]
            cloudy = np.take(cv, idx, axis=1)
            cloudy = np.sum(cloudy, axis=1)
            np.clip(cloudy, 0, 1, out=cloudy)

            h, xx = np.histogram(v.lon, bins=lonbins, weights=cloudy)
            outdict[a] = da.DimArray(h, [
                lon_axis,
            ])

        outname = n + '_cprof'
        out[outname] = da.stack(outdict, axis='altmin')
        out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + n

    return out
def get_atl_tcs(file='/Users/peterpfleiderer/Projects/tropical_cyclones/data/Allstorms.ibtracs_all.v03r10.nc'):
    TC=da.read_nc(file)
    # select north atlantic basin
    tc_sel=TC.ix[TC['basin'][:,0]==0]
    # select time period
    tc_sel=tc_sel.ix[tc_sel['season']>=1900,:]
    # select main tracks
    tc_sel=tc_sel.ix[tc_sel['track_type']==0,:]

    tc_lat=tc_sel['lat_for_mapping']
    tc_lon=tc_sel['lon_for_mapping']
    # tc_sel=tc_sel.ix[np.where(tc_sel_cat>0)]
    tmp_time=tc_sel['source_time']
    tc_year,tc_month,tc_yrmn,tc_yrFr=tmp_time.copy(),tmp_time.copy(),tmp_time.copy(),tmp_time.copy()
    for storm in tmp_time.storm:
        for tt in tmp_time.time:
            if np.isfinite(tmp_time[storm,tt]):
                datevar=num2date(tmp_time[storm,tt],units = tmp_time.units)
                tc_year[storm,tt]=datevar.year
                tc_month[storm,tt]=datevar.month
                tc_yrmn[storm,tt]=datevar.year+float(datevar.month-1)/12.
                tc_yrFr[storm,tt]=toYearFraction(datevar)

    # remove extratropical time steps (and distrubances)
    tc_wind=tc_sel['source_wind'].ix[:,:,0]
    tc_wind.ix[tc_sel['nature_for_mapping']!=2]==np.nan
    tc_wind.ix[tc_sel['nature_for_mapping']!=3]==np.nan
    tc_pres=tc_sel['source_pres'].ix[:,:,0]
    tc_pres.ix[tc_sel['nature_for_mapping']!=2]==np.nan
    tc_pres.ix[tc_sel['nature_for_mapping']!=3]==np.nan

    ds=da.Dataset({
        'wind':tc_wind,
        'mslp':tc_pres,
        'lat':tc_lat,
        'lon':tc_lon,
        'time':tc_sel['source_time'],
        'year':tc_year,
        'month':tc_month,
        'yrmn':tc_yrmn,
        'yrFr':tc_yrFr,
    })
    return(ds)
Ejemplo n.º 21
0
def _load(variables, bbox=None, maxshape=None):
    """
    """
    f = nc.Dataset(get_datafile(NCFILE))

    # reconstruct coordinates
    xmin, ymax = -638000.0, -657600.0
    spacing = 150.0
    nx, ny = 10018, 17946
    x = np.linspace (xmin, xmin + spacing*(nx-1), nx)  # ~ 10000 * 170000 points, 
    y = np.linspace (ymax, ymax - spacing*(ny-1), ny)  # reversed data

    slice_x, slice_y = get_slices_xy((x,y), bbox, maxshape, inverted_y_axis=True)

    vx = f.variables['vx'][slice_y, slice_x]
    vy = f.variables['vy'][slice_y, slice_x]
    x = x[slice_x]
    y = y[slice_y]

    # convert all to a dataset
    ds = da.Dataset()
    _map_var_names = _MAP_VAR_NAMES.copy()
    for nm in variables:
        ncvar = _map_var_names.pop(nm,nm)
        ds[nm] = da.DimArray(f.variables[ncvar][slice_y, slice_x], axes=[y,x], dims=['y','x'])
        # attributes
        for att in f.variables[ncvar].ncattrs():
            setattr(ds[nm], att.lower(), f.variables[ncvar].getncattr(att))

    # attributes
    for att in f.ncattrs():
        setattr(ds, att.lower(), f.getncattr(att))

    ds.dataset = NCFILE
    ds.description = DESC

    f.close()

    return ds
    def prepare_data(self,
                     lats,
                     lons,
                     time_,
                     dates,
                     smoothing_factor=1,
                     coarsening_factor=1,
                     land_mask=None,
                     time_steps=None):
        self._time = time_
        if time_steps is None:
            time_steps = range(len(self._time))
        self._time_i = time_steps
        self._dates = dates
        self._yr_frac = np.array([toYearFraction(dd) for dd in self._dates])

        self._coarsening_factor = coarsening_factor
        self._smoothing_factor = smoothing_factor

        # input fields
        self._lats_fine = lats
        self._lons_fine = lons
        self._lats = smoother(coarsener(lats, coarsening_factor),
                              smoothing_factor)
        self._lons = smoother(coarsener(lons, coarsening_factor),
                              smoothing_factor)
        self._lat = self._lats[:, 0]
        self._lon = self._lons[0, :]

        if land_mask is not None:
            self._land_mask = land_mask
        else:
            self._land_mask = np.ones(
                [len(self._time), lats.shape[0], lats.shape[1]])

        info = da.Dataset({'lats': self._lats, 'lons': self._lons})
        info.write_nc(self._working_dir + str(self._identifier) + '_grid.nc',
                      mode='w')
Ejemplo n.º 23
0
def zone_vcm_from_vcm_orbit(vcm_orbit, latbins=latbins):
    
    # read data
    #print 'opening ' + vcm_orbit
    v = vcm.VCM(vcm_orbit, verbose=False)
    
    nlat = latbins.shape[0]
    nalt = v.altitude.shape[0]
    
    out = da.Dataset()

    # ilatbins = vector with nprof indexes containing bin numbers
    ilatbins = np.digitize(v.lat, latbins)
    lat_axes = da.Axis(latbins[:-1], 'lat')

    nprof, xx = np.histogram(v.lat, bins=latbins)
    out['nprof'] = da.DimArray(nprof * 3, [lat_axes])

    alt_axes = da.Axis(v.altitude, 'altitude')

    for name in vcm_names:
        
        this_vcm = v.get_vcm(name)
        zone_vcm = np.zeros([nlat-1, nalt], dtype='uint16')
        
        prof_is_cloudy = np.sum(this_vcm, axis=1)
        np.clip(prof_is_cloudy, 0, 3, out=prof_is_cloudy)
        cprof, xx = np.histogram(v.lat, bins=latbins, weights=prof_is_cloudy)
        out[name + '_cprof'] = da.DimArray(cprof, [lat_axes])
        
        for i,ilatbin in enumerate(ilatbins[:-1]):
            if prof_is_cloudy[i] > 0:
                zone_vcm[ilatbin,:] += np.take(this_vcm, i, axis=0)
        out[name] = da.DimArray(zone_vcm, [lat_axes, alt_axes], longname='Number of cloudy points in lat-z bin, considering ' + name)
    
    return out
for i_run, file_name in enumerate(all_files[1:]):
    print(file_name)
    big_merge['eke'] = da.concatenate(
        (big_merge['eke'], da.read_nc(file_name)['eke'][:, 0:, :]))
    tmp = da.read_nc(file_name)['eke'][:, 0:, :].copy()
    tmp.values = i_run + 1
    big_merge['run_id'] = da.concatenate((big_merge['run_id'], tmp))

for region in [
        'EAS', 'TIB', 'CAS', 'WAS', 'MED', 'CEU', 'ENA', 'CNA', 'WNA', 'NAS',
        'NEU', 'CGI', 'ALA'
]:
    mask = masks[region][0:, :]
    lats = np.where(np.nanmax(mask, axis=1) != 0)[0]
    lons = np.where(np.nanmax(mask, axis=0) != 0)[0]

    da.Dataset({
        key: val.ix[:, lats, lons]
        for key, val in big_merge.items()
    }).write_nc(out_path +
                '_'.join(['EKE', model, scenario, 'bigMerge', region]) + '.nc')

da.Dataset({
    key: val[:, 35:60, :]
    for key, val in big_merge.items()
}).write_nc(out_path + '_'.join(['EKE', model, scenario, 'bigMerge', 'NHml']) +
            '.nc')

# del big_merge
# gc.collect()
                    '-o',
                    help="overwrite output files",
                    action="store_true")
args = parser.parse_args()

if args.overwrite:
    overwrite = True
else:
    overwrite = False

identifiers = [
    nn.split('_')[-3]
    for nn in glob.glob('../data/WAH/batch_755/region/item16222_6hrly_inst/*')
]

for style in ['contours']:
    if os.path.isfile('detection/ATL/ATL_all_tracks_' + style +
                      '.nc') == False or overwrite:
        # check for duplicates
        all_tracks = {}
        for identifier in identifiers:
            tmp = da.read_nc('detection/ATL/' + str(identifier) +
                             '/track_info_' + style + '.nc')
            for id_, track in tmp.items():
                if id_ not in ['z', 'time']:
                    all_tracks[id_] = track

        all_tracks = da.Dataset({'all_tracks': all_tracks})
        all_tracks.write_nc('detection/ATL/ATL_all_tracks_' + style + '.nc',
                            mode='w')
Ejemplo n.º 26
0
                                      '/stats_' + corWith_name + '*' +
                                      scenario + '*_' + state + '.nc')
                tmp_4 = {}
                for file_name in all_files:
                    region = file_name.split('_')[-2]
                    if region != 'NHml':
                        tmp = da.stack(da.read_nc(file_name),
                                       axis='statistic',
                                       align=True)
                        tmp_4[region] = tmp.mean(axis=(-2, -1))
                        tmp_4[region].values = np.nanmean(tmp, axis=(-2, -1))

                tmp_3_ = da.stack(tmp_3, align=True, axis='region')
                tmp_4_ = da.stack(tmp_4, align=True, axis='region')

                tmp_2[corWith_name] = da.concatenate((tmp_3_, tmp_4_),
                                                     align=True,
                                                     axis='statistic')

            tmp_1[state] = da.stack(tmp_2, axis='corWith', align=True)

        tmp_0[model] = da.stack(tmp_1, axis='state', align=True)

    result[scenario] = da.stack(tmp_0, axis='model', align=True)

da.Dataset({
    'summary_cor': da.stack(result, axis='sceanrio', align=True)
}).write_nc('data/cor_reg_summary.nc')

#
                        time_ = np.append(
                            time_, tmp_time[indices_of_mon][np.argmax(
                                tmp_pers[indices_of_mon])])

                    # detrend
                    slope, intercept, r_value, p_value, std_err = stats.linregress(
                        time_, pers_)
                    pers = pers_ - (intercept +
                                    slope * time_) + np.nanmean(pers_)
                    slope, intercept, r_value, p_value, std_err = stats.linregress(
                        time_, corWith_)
                    corWith = corWith_ - (intercept +
                                          slope * time_) + np.nanmean(corWith_)

                    for season_name, season_id in seasons.items():
                        cor_longest['corrcoef'][
                            season_name, y, x], cor_longest['p_value'][
                                season_name, y,
                                x] = stats.pearsonr(pers[sea_ == season_id],
                                                    corWith[sea_ == season_id])

    cor['corrcoef'].persistence = pers_file
    cor['corrcoef'].correlated_with = corWith_file
    da.Dataset(cor).write_nc(working_path + scenario + '/cor_' + corWith_name +
                             '_' + '_'.join([model, scenario]) + '_warm.nc')
    cor_longest['corrcoef'].persistence = pers_file
    cor_longest['corrcoef'].correlated_with = corWith_file
    da.Dataset(cor_longest).write_nc(working_path + scenario + '/corLongest_' +
                                     corWith_name + '_' +
                                     '_'.join([model, scenario]) + '_warm.nc')
Ejemplo n.º 28
0
                if unique:
                    xxx.append(x_)
                    storms.append(id_)
                    found_tracks[id_] = track
                    if track.shape[0] > longest_track:
                        longest_track = track.shape[0]

    all_tracks = da.DimArray(
        np.zeros([len(found_tracks.keys()), longest_track, 13]) * np.nan,
        axes=[found_tracks.keys(),
              range(longest_track), tmp_example.z],
        dims=['ID', 'time', 'z'])
    for id_, track in found_tracks.items():
        all_tracks[id_, 0:track.shape[0] - 1, :] = track
    da.Dataset({
        'all_tracks': all_tracks
    }).write_nc('detection/CAM25/CAM25_all_tracks.nc', mode='w')

else:
    all_tracks = da.read_nc(
        'detection/CAM25/CAM25_all_tracks.nc')['all_tracks']

############################
# check for real duplicates
############################
# # check for duplicates
# xxx=[]
# storms=[]
# useful_runs=[]
# not_unique={}
# for identifier in identifiers:
 ds = da.Dataset({
     'x90_cum_temp':
     da.DimArray(axes=[
         np.asarray(runs),
         np.asarray(range(period_number_limit), np.int32), lat, lon
     ],
                 dims=['run', 'ID', 'lat', 'lon']),
     'x90_mean_temp':
     da.DimArray(axes=[
         np.asarray(runs),
         np.asarray(range(period_number_limit), np.int32), lat, lon
     ],
                 dims=['run', 'ID', 'lat', 'lon']),
     'x90_hottest_day_shift':
     da.DimArray(axes=[
         np.asarray(runs),
         np.asarray(range(period_number_limit), np.int32), lat, lon
     ],
                 dims=['run', 'ID', 'lat', 'lon']),
     'x90_hottest_day':
     da.DimArray(axes=[
         np.asarray(runs),
         np.asarray(range(period_number_limit), np.int32), lat, lon
     ],
                 dims=['run', 'ID', 'lat', 'lon']),
     'original_period_id':
     da.DimArray(axes=[
         np.asarray(runs),
         np.asarray(range(period_number_limit), np.int32), lat, lon
     ],
                 dims=['run', 'ID', 'lat', 'lon']),
     'TXx_in_x90':
     da.DimArray(axes=[np.asarray(runs), example.year, lat, lon],
                 dims=['run', 'year', 'lat', 'lon'])
 })
Ejemplo n.º 30
0
                fut[region] = da.stack(da.read_nc(fut_file),
                                       axis='statistic',
                                       align=True)
                fut[region].lat = np.round(fut[region].lat, 02)
                fut[region].lon = np.round(fut[region].lon, 02)
            fut = da.stack(fut, align=True, axis='region')

            tmp_2[corWith_name] = da.stack((hist, fut),
                                           axis='scenario',
                                           keys=['All-Hist', 'Plus20-Future'],
                                           align=True)

        tmp_1[state] = da.stack(tmp_2, axis='corWith', align=True)

    # data = da.stack(tmp_1, axis='state', align=True)
    da.Dataset(tmp_1).write_nc(working_path + '/cor_Summary_' + model +
                               '_gridded.nc')

working_path = '/p/tmp/pepflei/HAPPI/raw_data/reg_stats/'

for model in ['NorESM1', 'CAM4-2degree', 'MIROC5', 'ECHAM6-3-LR']:
    tmp_1 = {}
    for state, style in state_dict.items():
        tmp_2 = {}
        for corWith_name in ['EKE', 'SPI3']:

            hist_files = glob.glob(working_path + model + '/stats_' +
                                   corWith_name + '_' +
                                   '_'.join([model, 'All-Hist', '*', state]) +
                                   '.nc')
            hist = {}
            for hist_file in hist_files: