Ejemplo n.º 1
0
def consolidate_results(data_hpo, data_replicates, rep_types):
    new_data = dict()
    for hpo, hpo_datas in data_replicates.items():
        ideal_datas = data_hpo[hpo]
        hpo_namespaces = sorted(ideal_datas.keys())
        ideal_data = [ideal_datas[namespace] for namespace in hpo_namespaces]
        new_data[hpo] = dict(ideal=xarray.combine_by_coords(ideal_data))
        new_data[hpo]['ideal'].coords['namespace'] = ('seed', hpo_namespaces)

        for replication_type in rep_types:
            hpo_namespaces = sorted(hpo_datas.keys())
            replicates_data = [
                hpo_datas[hpo_namespace][replication_type]
                for hpo_namespace in hpo_namespaces
            ]
            new_data[hpo][replication_type] = xarray.combine_by_coords(
                replicates_data)
            replicate_namespaces = [
                env(hpo_namespace, replication_type)
                for hpo_namespace in hpo_namespaces
            ]
            new_data[hpo][replication_type].coords['namespace'] = (
                'seed', replicate_namespaces)

    return new_data
Ejemplo n.º 2
0
    def test_combine_by_coords_combine_attrs_variables(
        self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception
    ):
        """check that combine_attrs is used on data variables and coords"""
        data1 = Dataset(
            {"x": ("a", [0], attrs1), "y": ("a", [0], attrs1), "a": ("a", [0], attrs1)}
        )
        data2 = Dataset(
            {"x": ("a", [1], attrs2), "y": ("a", [1], attrs2), "a": ("a", [1], attrs2)}
        )

        if expect_exception:
            with pytest.raises(MergeError, match="combine_attrs"):
                combine_by_coords([data1, data2], combine_attrs=combine_attrs)
        else:
            actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs)
            expected = Dataset(
                {
                    "x": ("a", [0, 1], expected_attrs),
                    "y": ("a", [0, 1], expected_attrs),
                    "a": ("a", [0, 1], expected_attrs),
                }
            )

            assert_identical(actual, expected)
Ejemplo n.º 3
0
 def test_check_for_impossible_ordering(self):
     ds0 = Dataset({"x": [0, 1, 5]})
     ds1 = Dataset({"x": [2, 3]})
     with raises_regex(
             ValueError, "does not have monotonic global indexes"
             " along dimension x"):
         combine_by_coords([ds1, ds0])
Ejemplo n.º 4
0
    def test_combine_by_coords_no_concat(self):
        objs = [Dataset({'x': 0}), Dataset({'y': 1})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': 0, 'y': 1})
        assert_identical(expected, actual)

        objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': 0, 'y': 1, 'z': 2})
        assert_identical(expected, actual)
Ejemplo n.º 5
0
def test_combine_by_coords_raises_for_differing_types():

    # str and byte cannot be compared
    da_1 = DataArray([0], dims=["time"], coords=[["a"]], name="a").to_dataset()
    da_2 = DataArray([1], dims=["time"], coords=[[b"b"]], name="a").to_dataset()

    with pytest.raises(
        TypeError, match=r"Cannot combine along dimension 'time' with mixed types."
    ):
        combine_by_coords([da_1, da_2])
Ejemplo n.º 6
0
 def test_combine_coords_mixed_datasets_arrays(self):
     objs = [
         DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})),
         Dataset({"x": [2, 3]}),
     ]
     with pytest.raises(
         ValueError,
         match=r"Can't automatically combine datasets with unnamed arrays.",
     ):
         combine_by_coords(objs)
Ejemplo n.º 7
0
    def test_combine_by_coords_no_concat(self):
        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1})
        assert_identical(expected, actual)

        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1, "z": 2})
        assert_identical(expected, actual)
Ejemplo n.º 8
0
 def test_combine_by_coords_still_fails(self):
     # concat can't handle new variables (yet):
     # https://github.com/pydata/xarray/issues/508
     datasets = [
         Dataset({"x": 0}, {"y": 0}),
         Dataset({"x": 1}, {
             "y": 1,
             "z": 1
         })
     ]
     with pytest.raises(ValueError):
         combine_by_coords(datasets, "y")
Ejemplo n.º 9
0
def _compute_threshold_grid(percentile, yearrange_ref, input_dir, gh_model, cl_model,
                            scenario, soc, fn_str_var, bbox, yearchunks,
                            mask_threshold=None, keep_dis_data=False):
    """given model run and year range specification, this function
    returns the x-th percentile for every pixel over a given
    time horizon (based on daily data) [all-year round percentiles!],
    as well as the mean at each grid cell.

    Parameters:
        c.f. parameters in LowFlow.set_from_nc()

    Optional parameters:
        mask_threshold (tuple or list), Threshold(s) of below which the
            grid is masked out. e.g. ('mean', 1.)

    Returns:
        p_grid (xarray): grid with dis of given percentile (1-timestep)
        mean_grid (xarray): grid with mean(dis)
        """
    LOGGER.info('Computing threshold value per grid cell for Q%i, %i-%i',
                percentile, yearrange_ref[0], yearrange_ref[1])
    if isinstance(mask_threshold, tuple):
        mask_threshold = [mask_threshold]
    bbox = _split_bbox(bbox)
    p_grid = []
    mean_grid = []
    # loop over coordinate bounding boxes to save memory:
    for box in bbox:
        dis_xarray = _read_and_combine_nc(yearrange_ref, input_dir, gh_model, cl_model,
                                    scenario, soc, fn_str_var, box, yearchunks)
        if dis_xarray.dis.data.size: # only if data is not empty
            p_grid += [_xarray_reduce(dis_xarray, fun='p', percentile=percentile)]
            # only compute mean_grid if required by user or mask_threshold:
            if keep_dis_data or (mask_threshold and True in ['mean' in x for x in mask_threshold]):
                mean_grid += [_xarray_reduce(dis_xarray, fun='mean')]

    del dis_xarray
    p_grid = xr.combine_by_coords(p_grid)
    if mean_grid:
        mean_grid = xr.combine_by_coords(mean_grid)

    if isinstance(mask_threshold, list):
        for crit in mask_threshold:
            if 'mean' in crit[0]:
                p_grid.dis.values[mean_grid.dis.values < crit[1]] = 0
                mean_grid.dis.values[mean_grid.dis.values < crit[1]] = 0
            if 'percentile' in crit[0]:
                p_grid.dis.values[p_grid.dis.values < crit[1]] = 0
                mean_grid.dis.values[p_grid.dis.values < crit[1]] = 0
    if keep_dis_data:
        return p_grid, mean_grid
    return p_grid, None
Ejemplo n.º 10
0
    def test_combine_by_coords_incomplete_hypercube(self):
        # test that this succeeds with default fill_value
        x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
        x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
        x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
        actual = combine_by_coords([x1, x2, x3])
        expected = Dataset(
            {"a": (("y", "x"), [[1, 1], [1, np.nan]])},
            coords={"y": [0, 1], "x": [0, 1]},
        )
        assert_identical(expected, actual)

        # test that this fails if fill_value is None
        with pytest.raises(ValueError):
            combine_by_coords([x1, x2, x3], fill_value=None)
Ejemplo n.º 11
0
    def test_combine_by_coords_all_unnamed_dataarrays(self):
        unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")

        actual = combine_by_coords([unnamed_array])
        expected = unnamed_array
        assert_identical(expected, actual)

        unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
        unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

        actual = combine_by_coords([unnamed_array1, unnamed_array2])
        expected = DataArray(
            data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x"
        )
        assert_identical(expected, actual)
Ejemplo n.º 12
0
def fetch_results(client, namespace, configs, medians, params, defaults):

    variables = list(sorted(configs.keys()))

    metrics = fetch_all_metrics(client, namespace, variables)

    epoch = defaults.get('epoch', 1)

    arrays = []
    trial_stats = fetch_vars_stats(client, namespace)
    if remaining(trial_stats):
        raise RuntimeError('Not all trials are completed')
    for variable in variables:
        trials = create_trials(configs[variable], params, metrics)
        variables_except_reference = [v for v in variables if v != 'reference']
        arrays.append(
            create_valid_curves_xarray(trials, metrics,
                                       variables_except_reference, epoch,
                                       list(sorted(params.keys())), variable))

    data = xarray.combine_by_coords(arrays)
    data.attrs['medians'] = medians
    data.coords['namespaces'] = (('seed', ), [
        env(namespace, v) for v in sorted(configs.keys())
    ])

    return data
Ejemplo n.º 13
0
    def test_combine_by_coords_mixed_unnamed_dataarrays(self):
        named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
        unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

        with pytest.raises(
            ValueError, match="Can't automatically combine unnamed DataArrays with"
        ):
            combine_by_coords([named_da, unnamed_da])

        da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]}))
        ds = Dataset({"x": [2, 3]})
        with pytest.raises(
            ValueError,
            match="Can't automatically combine unnamed DataArrays with",
        ):
            combine_by_coords([da, ds])
Ejemplo n.º 14
0
 def _record(self):
     """Method to get data from ANNarchy.Monitor instances,
        and merge and store them to the _data buffer of xarray.DataArray type."""
     for monitor, population in self.monitors.items():
         data = monitor.get()
         variables = list(data.keys())
         data = np.array(list(data.values()))
         if data.size > 0:
             data = data.transpose((1, 0, 2))
             data = DataArray(data,
                              dims=["Time", "Variable", "Neuron"],
                              coords={
                                  "Time":
                                  self._compute_times(
                                      monitor.times(), data.shape[0]),
                                  "Variable":
                                  variables,
                                  "Neuron":
                                  self._get_senders(population,
                                                    population.ranks)
                              },
                              name=self.label)
             if self._data.size:
                 self._data = combine_by_coords([self._data, data],
                                                fill_value=np.nan)
             else:
                 self._data = data
Ejemplo n.º 15
0
def open_and_combine_lat_lon_data(folder, tiles=None):
    """
    Load lat lon data stored as 10x10 degree tiles in folder
    If tiles is none, load all data available
    If no file is available, return None
    """
    fs = GCSFileSystem(cache_timeout=0)
    if not tiles:
        tiles = [
            os.path.splitext(os.path.split(path)[-1])[0]
            for path in fs.ls(folder) if not path.endswith('/')
        ]

    uris = [f'{folder}{tile}.zarr' for tile in tiles]
    ds_list = []
    for uri in uris:
        if fs.exists(uri):
            da = open_zarr_file(uri)
            if da.lat[0] > da.lat[-1]:
                da = da.reindex(lat=da.lat[::-1])
            if da.lon[0] > da.lon[-1]:
                da = da.reindex(lat=da.lon[::-1])
            ds_list.append(da)

    if len(ds_list) > 0:
        ds = xr.combine_by_coords(ds_list,
                                  combine_attrs="drop_conflicts").chunk({
                                      'lat':
                                      2000,
                                      'lon':
                                      2000
                                  })
        return ds
    # print(f'No data available at {folder} for tiles {tiles}')
    return None
def try_to_open_grib_file(path: str, ) -> xr.Dataset:
    """Try a few different ways to open up a grib file.

    Parameters
    ----------
    path : str
        Path pointing to location of grib file

    Returns
    -------
    ds : xr.Dataset
        The xarray Dataset that contains information
        from the grib file.
    """
    try:
        ds = xr.open_dataset(path, engine="cfgrib")
    except Exception as e:
        try:
            import cfgrib

            ds = cfgrib.open_datasets(path)
            ds = xr.combine_by_coords(ds)
        except:
            logger.error(f"Oh no! There was a problem opening up {path}: {e}")
            return
    return ds
Ejemplo n.º 17
0
def test_combine_by_coords_raises_for_differing_calendars():
    # previously failed with uninformative StopIteration instead of TypeError
    # https://github.com/pydata/xarray/issues/4495

    import cftime

    time_1 = [cftime.DatetimeGregorian(2000, 1, 1)]
    time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)]

    da_1 = DataArray([0], dims=["time"], coords=[time_1],
                     name="a").to_dataset()
    da_2 = DataArray([1], dims=["time"], coords=[time_2],
                     name="a").to_dataset()

    with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"):
        combine_by_coords([da_1, da_2])
def merge_datacubes(ds_merge):
    '''
    Merges datacubes by coordinates

    Parameters:
        ds_merge (xArray Dataset[]): Array of datasets to be merged

    Returns:
        ds1 (xArray Dataset): A single datacube with all merged datacubes
    '''

    start = datetime.now()
    if len(ds_merge) == 0:
        print("Error: No datacubes to merge")
        return
    if len(ds_merge) == 1:
        return ds_merge[0]
    else:
        print('Start merging')
        ds1 = ds_merge[0]
        count = 1
        while count < len(ds_merge):
            start1 = datetime.now()
            ds1 = xr.combine_by_coords([ds1, ds_merge[count]])
            count += 1
            diff = datetime.now() - start1
            print("Succesfully merged cube nr " + str(count) +
                  " to the base cube in " + str(diff.seconds) + 's')
        diff = datetime.now() - start
        print('All cubes merged for ' + str(diff.seconds) + 's')
        return ds1
Ejemplo n.º 19
0
    def _setup(self):
        """ 
        1. Concatenate a "modern" time-series, for 1950-2100
        
        """

        print("Setting up the analysis...")

        # Modern time-series
        self.modern = xr.combine_by_coords([
            self.hist.to_dataset(name=self.variable_id),
            self.fut.to_dataset(name=self.variable_id)
        ])[self.variable_id]

        # Sub-select data for the "modern" timeseries for 1950-2100
        # TODO: user-configuration for this parameter
        self.modern = self.modern.sel(year=slice(1950, 2100))

        # Center the pi data around its mean
        self.pi = self.pi - self.pi.mean('year')

        # Fit anomaly calculator and then compute anomalies
        self.baseline_anomalizer = BaselineAnomalizer('year', (1980, 2010))
        self.baseline_anomalizer.fit(self.modern)
        self.modern_anom = self.baseline_anomalizer.transform(self.modern)

        # Global averages
        x = self.pi.isel(year=0)
        _area = area_grid(x['lon'].data, x['lat'].data, asarray=False)
        # We eagerly did the area grid calculation in memory, so let's
        # turn it into a dask array now (inside a DataArray)
        _area = _area.chunk()
        self.area = _area
        self.modern_anom_gavg = global_avg(self.modern_anom, weights=self.area)
Ejemplo n.º 20
0
def batch_load(obj, factor=2):
    """
    Load xarray object values by calling compute on block subsets (that are an integral multiple of chunks along each chunked dimension)

    Parameters
    ----------
    obj: xarray object
    factor: int
        multiple of chunksize to load at a single time.
        Passed on to split_blocks
    """
    if isinstance(obj, xr.DataArray):
        dataset = obj._to_temp_dataset()
    else:
        dataset = obj

    # result = xr.full_like(obj, np.nan).load()
    computed = []
    for label, chunk in split_blocks(dataset, factor=factor):
        print(f"computing {label}")
        computed.append(chunk.compute())
    result = xr.combine_by_coords(computed)

    if isinstance(obj, xr.DataArray):
        result = obj._from_temp_dataset(result)

    return result
Ejemplo n.º 21
0
    def combine(self, cleanup=False):
        """Create volume data (excluding surface data) by combining
        lat/lon coordinates across all datasets. Tested for data on a
        regular grid.

        Notes:
        - This has a _very_ large memory overhead, i.e., need enough
          memory to store and manipulate all of the tower data
          simultaneously, otherwise it may hang.
        - xarray.combine_by_coords fails with a cryptic "the supplied
          objects do not form a hypercube" message if the lat/lon values
          do not form a regular grid
        """
        datalist = [data for key, data in self.data.items()]
        self.ds = xr.combine_by_coords(datalist)
        if cleanup is True:
            import gc  # garbage collector
            try:
                del self.data
            except AttributeError:
                pass
            else:
                if self.verbose:
                    print('Cleared data dict from memory')
            finally:
                gc.collect()
        return self.ds
Ejemplo n.º 22
0
    def test_combine_by_coords_all_dataarrays_with_the_same_name(self):
        named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
        named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

        actual = combine_by_coords([named_da1, named_da2])
        expected = merge([named_da1, named_da2])
        assert_identical(expected, actual)
Ejemplo n.º 23
0
    def test_combine_by_coords(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0]),
                "a": [0]
            }),
            Dataset({
                "x": ("a", [1]),
                "y": ("a", [1]),
                "a": [1]
            }),
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            "x": ("a", [0, 1]),
            "y": ("a", [0, 1]),
            "a": [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1], "y": [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with pytest.raises(ValueError,
                           match=r"Could not find any dimension coordinates"):
            combine_by_coords(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with pytest.raises(ValueError,
                           match=r"Every dimension needs a coordinate"):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))
Ejemplo n.º 24
0
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
               lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/'
    CheckConnection.set_url('ncei.noaa.gov')

    x_arr_list = []
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    for day in range((date_hi - start_date).days + 1):
        dt = datetime(start_date.year, start_date.month,
                      start_date.day) + timedelta(days=day)
        catalog = TDSCatalog(
            '%s%s%.2d/%s%.2d%.2d/catalog.xml' %
            (base_url, dt.year, dt.month, dt.year, dt.month, dt.day))
        for hour in [3, 6]:
            for cycle in [0, 6, 12, 18]:
                attempts = 0
                while True:
                    try:
                        attempts += 1
                        name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % (
                            dt.year, dt.month, dt.day, cycle, hour)
                        if name in list(catalog.datasets):
                            ds_subset = catalog.datasets[name].subset()
                            query = ds_subset.query().lonlat_box(
                                north=lat_hi,
                                south=lat_lo,
                                east=lon_hi,
                                west=lon_lo).variables(*GFS_50_VAR_LIST)
                            CheckConnection.is_online()
                            data = ds_subset.get_data(query)
                            x_arr = xr.open_dataset(NetCDF4DataStore(data))
                            if 'time1' in list(x_arr.coords):
                                x_arr = x_arr.rename({'time1': 'time'})
                            x_arr_list.append(x_arr)
                        else:
                            logger.warning('dataset %s is not found' % name)
                        break
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        CheckConnection.is_online()
                        logger.error(e)
                        logger.error(
                            'Filename %s - Failed connecting to GFS Server - number of attempts: %d'
                            % (name, attempts))
                        time.sleep(2)

    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    res = dataset.interp(lon=lon_points, lat=lat_points,
                         time=time_points).to_dataframe()[GFS_50_VAR_LIST]
    res[[
        'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground'
    ]] = [[np.nan, np.nan]] * len(res)
    return res
Ejemplo n.º 25
0
def forward_propagation(filenames, total_data, nscans, min_peak, nlevel,
                        avg_area, cgridx, cgridy):

    if filenames[-1] == filenames[nscans[-1]]:

        return total_data, 0, cgridx, cgridy, nscans
    else:

        future_cgridy = []
        future_cgridx = []
        peak_ref = []
        cgridxf = cgridx[-1]
        cgridyf = cgridy[-1]
        for future_i in np.arange(len(filenames) - nscans[-1]):
            data = xr.open_dataset(filenames[nscans[-1] + future_i])
            peak = np.nanmax(
                data['reflectivity'].values[0, nlevel, cgridyf -
                                            avg_area:cgridyf + avg_area,
                                            cgridxf - avg_area:cgridxf +
                                            avg_area])
            if peak < min_peak or math.isnan(peak):
                future_scan = future_i
                break
            peak_ref.append(peak)
            future_scan = future_i + 1
            future_cgridy.append(
                np.where(data['reflectivity'].values[0, nlevel, :, :] ==
                         peak_ref[future_i])[0][0])
            future_cgridx.append(
                np.where(data['reflectivity'].values[0, nlevel, :, :] ==
                         peak_ref[future_i])[1][0])

            # update center
            cgridxf = future_cgridx[future_i]
            cgridyf = future_cgridy[future_i]
            del (data)

        fw_nscans = np.zeros(len(nscans) + future_scan)
        fw_cgridy, fw_cgridx = np.zeros(len(nscans) + future_scan), np.zeros(
            len(nscans) + future_scan)
        for idx in np.arange(future_scan):
            fw_nscans[len(nscans) + idx] = nscans[-1] + idx + 1
            fw_cgridy[len(nscans) + idx] = future_cgridy[idx]
            fw_cgridx[len(nscans) + idx] = future_cgridx[idx]
        fw_nscans[:len(nscans)] = nscans
        fw_cgridy[:len(nscans)] = cgridy
        fw_cgridx[:len(nscans)] = cgridx
        fw_nscans = fw_nscans.astype(int)
        fw_cgridx = fw_cgridx.astype(int)
        fw_cgridy = fw_cgridy.astype(int)

        if future_cgridx:
            for i in np.arange(1, future_scan):
                if len(filenames) >= nscans[-1] + i:
                    data = xr.open_dataset(filenames[nscans[-1] + i])
                    total_data = xr.combine_by_coords([total_data, data])
                    del (data)

        return total_data, future_i, fw_cgridx, fw_cgridy, fw_nscans
Ejemplo n.º 26
0
def open_fastoutput(datapath="BOUT.fast.*.nc"):
    """
    Opens fast output data and combines into a single dataset.

    """

    # Get list of all files
    filepaths, filetype = _expand_filepaths(datapath)

    # Iterate over all files, extracting DataArrays ready for combining
    fo_data = []
    for i, filepath in enumerate(filepaths):

        fo = xr.open_dataset(filepath)

        if i == 0:
            # Get time coordinate from first file
            time = fo["time"]

        # Time is global, and we already extracted it
        fo = fo.drop_vars("time", errors="ignore")

        # There might be no virtual probe in this region
        if len(fo.data_vars) > 0:

            for name, da in fo.items():

                # Save the physical position (in index units)
                da = da.expand_dims(x=1, y=1, z=1)
                da = da.assign_coords(
                    x=xr.DataArray([da.attrs["ix"]], dims=["x"]),
                    y=xr.DataArray([da.attrs["iy"]], dims=["y"]),
                    z=xr.DataArray([da.attrs["iz"]], dims=["z"]),
                )

                # Re-attach the time coordinate
                da = da.assign_coords(time=time)

                # We saved the position, so don't care what number the variable was
                # Only need it's name (i.e. n, T, etc.)
                regex = re.compile(r"(\D+)([0-9]+)")
                match = regex.match(name)
                if match is None:
                    raise ValueError(
                        f"Regex could not parse the variable named {name}")
                var, num = match.groups()
                da.name = var

                # Must promote DataArrays to Datasets until we require xarray-0.19.0
                # where xarray GH #3248 is fixed
                ds = xr.Dataset({var: da})
                fo_data.append(ds)

        fo.close()

    # This will merge different variables, and arrange by physical position
    full_fo = xr.combine_by_coords(fo_data, combine_attrs="drop_conflicts")

    return full_fo
Ejemplo n.º 27
0
 def test_combine_coords_mixed_datasets_named_dataarrays(self):
     da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]}))
     ds = Dataset({"b": ("x", [2, 3])})
     actual = combine_by_coords([da, ds])
     expected = Dataset(
         {"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])}
     )
     assert_identical(expected, actual)
Ejemplo n.º 28
0
    def test_combine_by_coords(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                'x': ('a', [0]),
                'y': ('a', [0]),
                'a': [0]
            }),
            Dataset({
                'x': ('a', [1]),
                'y': ('a', [1]),
                'a': [1]
            })
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            'x': ('a', [0, 1]),
            'y': ('a', [0, 1]),
            'a': [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1], 'y': [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with raises_regex(ValueError, 'Could not find any dimension '
                          'coordinates'):
            combine_by_coords(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with raises_regex(ValueError, 'Every dimension needs a coordinate'):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))
Ejemplo n.º 29
0
    def Generate_HIST_Covariates(self):
        '''
        Load, fix, and resample (hourly) all historical covariates:
            AWTs, DWTs, MJO, MMSL, AT
        '''

        # load data
        AWT = self.Load_SST_KMA()  # bmus + 1
        MSL = self.Load_TIDE_hist_mmsl() # mmsl (mm)
        MJO = self.Load_MJO_hist()
        DWT = self.Load_ESTELA_KMA()  # bmus + 1
        ATD_h = self.Load_TIDE_hist_astro()

        # fix WTs id format
        AWT = xr.Dataset({'bmus': AWT.bmus + 1}, coords = {'time': AWT.time})
        DWT = xr.Dataset({'bmus': (('time',), DWT.sorted_bmus_storms + 1)},
                         coords = {'time': DWT.time.values[:]})

        # get MJO categories 
        mjo_cs, _ = MJO_Categories(MJO['rmm1'], MJO['rmm2'], MJO['phase'])
        MJO['bmus'] = (('time',), mjo_cs)

        # reindex data to hourly (pad)
        AWT_h = fast_reindex_hourly(AWT)
        MSL_h = MSL.resample(time='1h').pad()
        MJO_h = fast_reindex_hourly(MJO)
        DWT_h = fast_reindex_hourly(DWT)

        # generate time envelope for output 
        d1, d2 = xds_further_dates(
            [AWT_h, ATD_h, MSL_h, MJO_h, DWT_h, ATD_h]
        )
        ten = pd.date_range(d1, d2, freq='H')

        # generate empty output dataset 
        OUT_h = xr.Dataset(coords={'time': ten})

        # prepare data
        AWT_h = AWT_h.rename({'bmus':'AWT'})
        MJO_h = MJO_h.drop_vars(['mjo','rmm1','rmm2','phase']).rename({'bmus':'MJO'})
        MSL_h = MSL_h.drop_vars(['mmsl_median']).rename({'mmsl':'MMSL'})
        MSL_h['MMSL'] = MSL_h['MMSL'] / 1000.0  # mm to m
        DWT_h = DWT_h.rename({'bmus':'DWT'})

        # TODO: revisar esto
        ATD_h = ATD_h.drop_vars(['WaterLevels','Residual']).rename({'Predicted': 'AT'})
        #ATD_h = ATD_h.drop_vars(['observed','ntr','sigma']).rename({'predicted':'AT'})

        # combine data
        xds = xr.combine_by_coords(
            [OUT_h, AWT_h, MJO_h, MSL_h, DWT_h, ATD_h],
            fill_value = np.nan,
        )

        # repair times: round to hour and remove duplicates (if any)
        xds = repair_times_hourly(xds)

        return xds
Ejemplo n.º 30
0
    def test_combine_by_coords_all_named_dataarrays(self):
        named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")

        actual = combine_by_coords([named_da])
        expected = named_da.to_dataset()
        assert_identical(expected, actual)

        named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
        named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

        actual = combine_by_coords([named_da1, named_da2])
        expected = Dataset(
            {
                "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"),
                "b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"),
            }
        )
        assert_identical(expected, actual)