def test_resample_for_gsee_with_pdfs():
    np.random.seed(222)
    expected_results = {
        "AS": (2642.10882, 2642.108819),
        "MS": (148373.038062, 6451.001654),
    }
    for freq in ["AS", "MS"]:
        data_l = 2 if freq == "AS" else 24
        data = np.linspace(100, 800, data_l)
        coords = (45, 8.5)
        i = int(data_l / 2)
        ds = xr.Dataset(
            data_vars={"global_horizontal": (("time"), data)},
            coords={
                "time": pd.date_range(start="2000-01-01", periods=data_l, freq=freq),
                "lat": [coords[0]],
                "lon": [coords[1]],
            },
        )
        ds = ds.sel(lat=coords[0], lon=coords[1])
        ds_pdfs = xr.Dataset(
            data_vars={
                "xk": (("bins", "month"), 10000 * np.random.rand(128, 12) / 2),
                "pk": (("bins", "month"), np.random.rand(128, 12)),
            },
            coords={
                "bins": range(0, 128),
                "month": range(1, 13),
                "lat": [coords[0]],
                "lon": [coords[1]],
            },
        )
        ds_pdfs = ds_pdfs.sel(lat=coords[0], lon=coords[1])
        params = {
            "tilt": 35,
            "azim": 180,
            "tracking": 0,
            "capacity": 1000,
            "use_inverter": False,
        }
        manager = multiprocessing.Manager()
        shr_mem = manager.list([None] * data_l)
        prog_mem = manager.list()
        prog_mem.append(data_l)
        pre.resample_for_gsee(
            ds, freq[0], params, i, coords, shr_mem, prog_mem, ds_pdfs
        )
        shr_obj = shr_mem[i].resample(time=freq).pad()
        assert isinstance(shr_obj, xr.Dataset)
        assert len(shr_obj.data_vars) == 1
        assert "pv" in shr_obj.data_vars
        assert shr_obj.sizes["time"] == len(ds["global_horizontal"])
        assert shr_obj.sizes["lat"] == 1
        assert shr_obj.sizes["lon"] == 1
        assert np.array_equal(ds["time"].values, shr_obj["time"].values)
        assert shr_obj["pv"].sum() == pytest.approx(expected_results[freq][0], abs=1e-5)
        assert np.nanmean(shr_obj["pv"].values) == pytest.approx(
            expected_results[freq][1], abs=1e-5
        )
def test_resample_for_gsee():
    data_l = 48
    data = np.linspace(100, 800, data_l)
    expected_results = {
        'AS': (366112.891239, 7789.635983),
        'D': (384515.728381, 8010.744341),
        'H': (15306.762448, 318.8908843)
    }
    for freq in ['AS', 'D', 'H']:
        if freq == 'H':
            data = [
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.9, 87.9, 231.2, 385.6, 478.1,
                507.1, 580.3, 630.3, 508.5, 316.1, 208.1, 80.9, 3.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.3, 72.9, 121.3,
                164.3, 358.5, 854.5, 904.0, 938.0, 917.0, 844.3, 551.8, 519.8,
                454.3, 205.8, 70.0, 4.4, 0.0, 0.0, 0.0
            ]
        coords = (45, 8.5)
        i = 12
        ds = xr.Dataset(data_vars={'global_horizontal': (('time'), data)},
                        coords={
                            'time':
                            pd.date_range(start='2000-01-01',
                                          periods=data_l,
                                          freq=freq),
                            'lat': [coords[0]],
                            'lon': [coords[1]]
                        })
        ds = ds.sel(lat=coords[0], lon=coords[1])
        params = {
            'tilt': 35,
            'azim': 180,
            'tracking': 0,
            'capacity': 1000,
        }
        manager = multiprocessing.Manager()
        shr_mem = manager.list([None] * data_l)
        prog_mem = manager.list()
        prog_mem.append(data_l)

        pre.resample_for_gsee(ds, freq[0], params, i, coords, shr_mem,
                              prog_mem)

        shr_obj = shr_mem[i].resample(time=freq).pad()
        assert isinstance(shr_obj, xr.Dataset)
        assert len(shr_obj.data_vars) == 1
        assert 'pv' in shr_obj.data_vars
        assert shr_obj.sizes['time'] == len(ds['global_horizontal'])
        assert shr_obj.sizes['lat'] == 1
        assert shr_obj.sizes['lon'] == 1
        assert np.array_equal(ds['time'].values, shr_obj['time'].values)
        assert shr_obj['pv'].sum() == pytest.approx(expected_results[freq][0],
                                                    abs=1e-5)
        assert np.nanmean(shr_obj['pv'].values) == pytest.approx(
            expected_results[freq][1], abs=1e-5)
def test_resample_for_gsee_with_pdfs():
    np.random.seed(222)
    expected_results = {
        'AS': (2642.10882, 2642.108819),
        'MS': (148373.038062, 6451.001654)
    }
    for freq in ['AS', 'MS']:
        data_l = 2 if freq == 'AS' else 24
        data = np.linspace(100, 800, data_l)
        coords = (45, 8.5)
        i = int(data_l / 2)
        ds = xr.Dataset(data_vars={'global_horizontal': (('time'), data)},
                        coords={
                            'time':
                            pd.date_range(start='2000-01-01',
                                          periods=data_l,
                                          freq=freq),
                            'lat': [coords[0]],
                            'lon': [coords[1]]
                        })
        ds = ds.sel(lat=coords[0], lon=coords[1])
        ds_pdfs = xr.Dataset(data_vars={
            'xk': (('bins', 'month'), 10000 * np.random.rand(128, 12) / 2),
            'pk': (('bins', 'month'), np.random.rand(128, 12))
        },
                             coords={
                                 'bins': range(0, 128),
                                 'month': range(1, 13),
                                 'lat': [coords[0]],
                                 'lon': [coords[1]]
                             })
        ds_pdfs = ds_pdfs.sel(lat=coords[0], lon=coords[1])
        params = {'tilt': 35, 'azim': 180, 'tracking': 0, 'capacity': 1000}
        manager = multiprocessing.Manager()
        shr_mem = manager.list([None] * data_l)
        prog_mem = manager.list()
        prog_mem.append(data_l)
        pre.resample_for_gsee(ds, freq[0], params, i, coords, shr_mem,
                              prog_mem, ds_pdfs)
        shr_obj = shr_mem[i].resample(time=freq).pad()
        assert isinstance(shr_obj, xr.Dataset)
        assert len(shr_obj.data_vars) == 1
        assert 'pv' in shr_obj.data_vars
        assert shr_obj.sizes['time'] == len(ds['global_horizontal'])
        assert shr_obj.sizes['lat'] == 1
        assert shr_obj.sizes['lon'] == 1
        assert np.array_equal(ds['time'].values, shr_obj['time'].values)
        assert shr_obj['pv'].sum() == pytest.approx(expected_results[freq][0],
                                                    abs=1e-5)
        assert np.nanmean(shr_obj['pv'].values) == pytest.approx(
            expected_results[freq][1], abs=1e-5)
def test_resample_for_gsee_with_pdfs():
    for freq in ['AS', 'MS']:
        coords = (45, 8.5)
        i = np.random.randint(0, 48)
        ds = xr.Dataset(data_vars={
            'global_horizontal': (('time'), np.random.rand(48) / 2)
        },
                        coords={
                            'time':
                            pd.date_range(start='2000-01-01',
                                          periods=48,
                                          freq=freq),
                            'lat': [coords[0]],
                            'lon': [coords[1]]
                        })
        ds = ds.sel(lat=coords[0], lon=coords[1])
        ds_pdfs = xr.Dataset(data_vars={
            'xk': (('bins', 'month'), 10 * np.random.rand(128, 12) / 2),
            'pk': (('bins', 'month'), np.random.rand(128, 12))
        },
                             coords={
                                 'bins': range(0, 128),
                                 'month': range(1, 13),
                                 'lat': [coords[0]],
                                 'lon': [coords[1]]
                             })
        ds_pdfs = ds_pdfs.sel(lat=coords[0], lon=coords[1])
        params = {'tilt': 35, 'azim': 180, 'tracking': 0, 'capacity': 1000}
        manager = multiprocessing.Manager()
        shr_mem = manager.list([None] * 48)
        prog_mem = manager.list()
        prog_mem.append(48)

        pre.resample_for_gsee(ds, freq[0], params, i, coords, shr_mem,
                              prog_mem, ds_pdfs)

        shr_obj = shr_mem[i].resample(time=freq).pad()
        assert isinstance(shr_obj, xr.Dataset)
        assert len(shr_obj.data_vars) == 1
        assert 'pv' in shr_obj.data_vars
        assert shr_obj.sizes['time'] == len(ds['global_horizontal'])
        assert shr_obj.sizes['lat'] == 1
        assert shr_obj.sizes['lon'] == 1
        assert np.array_equal(ds['time'].values, shr_obj['time'].values)
Example #5
0
def run_interface_from_dataset(
    data: xr.Dataset,
    params: dict,
    frequency='detect',
    pdfs_file='builtin',
    num_cores=multiprocessing.cpu_count()) -> xr.Dataset:
    """
    Parameters
    ----------
    data: xarray Dataset
        containing at lest one variable 'global_horizontal' with mean
        global horizontal irradiance in W/m2.
        Optional variables: 'diffuse_fraction', 'temperature' in °C
    params: dict
        Parameters for GSEE, i.e. 'tilt', 'azim',
        'tracking', 'capacity'. tilt can be a function depending on
        latitude -- see example input. Tracking can be 0, 1, 2 for no
        tracking, 1-axis tracking, 2-axis tracking.
    frequency: str, optional
        Frequency of the input data. One of ['A', 'S', 'M', 'D', 'H'],
        for annual, seasonal, monthly, daily, hourly. Defaults to 'detect',
        whith attempts to automatically detect the correct frequency.
    pdfs_file: str, optional
        Path to a NetCDF file with probability density functions to use
        for each month. Only for annual, seasonal and monthly data.
        Default is 'builtin', which automatically downloads and uses a
        built-in global PDF based on MERRA-2 data. Set to None to disable.
    num_cores: int, optional
        Number of cores that should be used for the computation.
        Default is all available cores.

    Returns
    -------
    xarray Dataset
        PV power output in Wh/hour if frequency is 'H', else in kWh/day

    """
    frequency = _detect_frequency(data, frequency)

    # Produce list of coordinates of all grid points to iterate over
    coord_list = list(product(data['lat'].values, data['lon'].values))

    # Modify time dimension so it fits the requirements of
    # the "resample_for_gsee" function
    data['time'] = _mod_time_dim(pd.to_datetime(data['time'].values),
                                 frequency)

    # Shareable list with a place for every coordinate in the grid
    manager = multiprocessing.Manager()
    shr_mem = manager.list([None] * len(coord_list))
    # Store length of coordinate list in prog_mem to draw
    # the progress bar dynamically
    prog_mem = manager.list()
    prog_mem.append(len(coord_list))

    start = time.time()

    if pdfs_file is not None:
        if frequency in ['A', 'S', 'M']:
            if pdfs_file == 'builtin':
                pdfs = xr.open_dataset(util.return_pdf_path(), autoclose=True)
            else:
                pdfs = xr.open_dataset(pdfs_file, autoclose=True)
            pdf_coords = list(product(pdfs['lat'].values, pdfs['lon'].values))
            tree = spatial.KDTree(pdf_coords)
            coord_list_nn = [
                pdf_coords[int(tree.query([x])[1])] for x in coord_list
            ]
        else:
            raise ValueError('For frequencies other than "A", "M", or "D", '
                             '`pdfs_file` must be explicitly set to None.')

    if num_cores > 1:
        print('Parallel mode: {} cores'.format(num_cores))
        from joblib import Parallel, delayed
        Parallel(n_jobs=num_cores)(delayed(resample_for_gsee)(
            data.sel(lat=coords[0], lon=coords[1]), frequency, params, i,
            coords, shr_mem, prog_mem, None if pdfs_file is None else pdfs.
            sel(lat=coord_list_nn[i][0], lon=coord_list_nn[i][1]))
                                   for i, coords in enumerate(coord_list))
    else:
        print('Single core mode')
        for i, coords in enumerate(coord_list):
            resample_for_gsee(
                data.sel(lat=coords[0], lon=coords[1]), frequency, params, i,
                coords, shr_mem, prog_mem, None if pdfs_file is None else
                pdfs.sel(lat=coord_list_nn[i][0], lon=coord_list_nn[i][1]))

    end = time.time()
    print('\nComputation part took: {} seconds'.format(
        str(round(end - start, 2))))

    # Stitch together the data
    result = xr.Dataset()
    for piece in shr_mem:
        if type(piece) == type(data):
            result = xr.merge([result, piece])
    result = result.transpose('time', 'lat', 'lon')
    result['time'] = data['time']
    if frequency == 'H':
        result['pv'].attrs['unit'] = 'Wh'
    elif frequency in ['A', 'S', 'M', 'D']:
        result['pv'].attrs['unit'] = 'Wh/day'

    return result
def test_resample_for_gsee():
    data_l = 48
    data = np.linspace(100, 800, data_l)
    expected_results = {
        "AS": (366112.891239, 7789.635983),
        "D": (384515.728381, 8010.744341),
        "H": (15306.762448, 318.8908843),
    }
    for freq in ["AS", "D", "H"]:
        if freq == "H":
            data = [
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                4.9,
                87.9,
                231.2,
                385.6,
                478.1,
                507.1,
                580.3,
                630.3,
                508.5,
                316.1,
                208.1,
                80.9,
                3.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                5.3,
                72.9,
                121.3,
                164.3,
                358.5,
                854.5,
                904.0,
                938.0,
                917.0,
                844.3,
                551.8,
                519.8,
                454.3,
                205.8,
                70.0,
                4.4,
                0.0,
                0.0,
                0.0,
            ]
        coords = (45, 8.5)
        i = 12
        ds = xr.Dataset(
            data_vars={"global_horizontal": (("time"), data)},
            coords={
                "time": pd.date_range(start="2000-01-01", periods=data_l, freq=freq),
                "lat": [coords[0]],
                "lon": [coords[1]],
            },
        )
        ds = ds.sel(lat=coords[0], lon=coords[1])
        params = {
            "tilt": 35,
            "azim": 180,
            "tracking": 0,
            "capacity": 1000,
            "use_inverter": False,
        }
        manager = multiprocessing.Manager()
        shr_mem = manager.list([None] * data_l)
        prog_mem = manager.list()
        prog_mem.append(data_l)

        pre.resample_for_gsee(ds, freq[0], params, i, coords, shr_mem, prog_mem)

        shr_obj = shr_mem[i].resample(time=freq).pad()
        assert isinstance(shr_obj, xr.Dataset)
        assert len(shr_obj.data_vars) == 1
        assert "pv" in shr_obj.data_vars
        assert shr_obj.sizes["time"] == len(ds["global_horizontal"])
        assert shr_obj.sizes["lat"] == 1
        assert shr_obj.sizes["lon"] == 1
        assert np.array_equal(ds["time"].values, shr_obj["time"].values)
        assert shr_obj["pv"].sum() == pytest.approx(expected_results[freq][0], abs=1e-5)
        assert np.nanmean(shr_obj["pv"].values) == pytest.approx(
            expected_results[freq][1], abs=1e-5
        )