コード例 #1
0
 def _expand_tiepoint_array_5km(self, arr, lines, cols):
     arr = da.repeat(arr, lines * 2, axis=1)
     arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1)
     if self.cscan_full_width == 271:
         return da.hstack((arr[:, :2], arr, arr[:, -2:]))
     else:
         return da.hstack((arr[:, :2], arr, arr[:, -5:], arr[:, -2:]))
コード例 #2
0
 def _expand_tiepoint_array_1km(self, arr, lines, cols):
     arr = da.repeat(arr, lines, axis=1)
     arr = da.concatenate(
         (arr[:, :lines // 2, :], arr, arr[:, -(lines // 2):, :]), axis=1)
     arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)),
                     cols,
                     axis=1)
     return da.hstack((arr, arr[:, -cols:]))
コード例 #3
0
 def _expand_tiepoint_array_5km(self, arr, lines, cols):
     arr = da.repeat(arr, lines * 2, axis=1)
     arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1)
     factor = self.fscan_width // self.cscan_width
     if self.cscan_full_width == 271:
         return da.hstack((arr[:, :2 * factor], arr, arr[:, -2 * factor:]))
     else:
         return da.hstack((arr[:, :2 * factor], arr, arr[:, -self.fscan_width:], arr[:, -2 * factor:]))
コード例 #4
0
ファイル: test_creation.py プロジェクト: caseyclements/dask
def test_repeat():
    x = np.random.random((10, 11, 13))
    d = da.from_array(x, chunks=(4, 5, 3))

    repeats = [1, 2, 5]
    axes = [-3, -2, -1, 0, 1, 2]

    for r in repeats:
        for a in axes:
            assert_eq(x.repeat(r, axis=a), d.repeat(r, axis=a))

    assert_eq(d.repeat(2, 0), da.repeat(d, 2, 0))

    with pytest.raises(NotImplementedError):
        da.repeat(d, np.arange(10))

    with pytest.raises(NotImplementedError):
        da.repeat(d, 2, None)

    with pytest.raises(NotImplementedError):
        da.repeat(d, 2)

    for invalid_axis in [3, -4]:
        with pytest.raises(ValueError):
            da.repeat(d, 2, axis=invalid_axis)

    x = np.arange(5)
    d = da.arange(5, chunks=(2,))

    assert_eq(x.repeat(3), d.repeat(3))

    for r in [1, 2, 3, 4]:
        assert all(concat(d.repeat(r).chunks))
コード例 #5
0
def test_repeat():
    x = np.random.random((10, 11, 13))
    d = da.from_array(x, chunks=(4, 5, 3))

    repeats = [0, 1, 2, 5]
    axes = [-3, -2, -1, 0, 1, 2]

    for r in repeats:
        for a in axes:
            assert_eq(x.repeat(r, axis=a), d.repeat(r, axis=a))

    assert_eq(d.repeat(2, 0), da.repeat(d, 2, 0))

    with pytest.raises(NotImplementedError):
        da.repeat(d, np.arange(10))

    with pytest.raises(NotImplementedError):
        da.repeat(d, 2, None)

    with pytest.raises(NotImplementedError):
        da.repeat(d, 2)

    for invalid_axis in [3, -4]:
        with pytest.raises(ValueError):
            da.repeat(d, 2, axis=invalid_axis)

    x = np.arange(5)
    d = da.arange(5, chunks=(2, ))

    assert_eq(x.repeat(3), d.repeat(3))

    for r in [1, 2, 3, 4]:
        assert all(concat(d.repeat(r).chunks))
コード例 #6
0
 def _expand_tiepoint_array_5km(self, arr, lines, cols):
     if self.level == 2:  # Repeat the last column to complete L2 data
         arr = da.dstack([arr, arr[:, :, -1]])
     arr = da.repeat(arr, lines * 2, axis=1)
     if self.level == 1:
         arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)),
                         cols,
                         axis=1)
     elif self.level == 2:
         arr = da.repeat(arr.reshape((-1, self.cscan_full_width)),
                         cols,
                         axis=1)
     return da.hstack((arr[:, :2], arr, arr[:, -2:]))
コード例 #7
0
ファイル: test_ninjotiff.py プロジェクト: pytroll/pyninjotiff
def test_write_bw_inverted_ir_fill():
    """Test saving a BW image with transparency."""
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 70.0 / 120
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', 'K'),
                  ('name', '4'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=35)),
                  ('end_time', TIME - datetime.timedelta(minutes=30)),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    kwargs = {'ch_min_measurement_unit': np.array([-70]),
              'ch_max_measurement_unit': np.array([50]),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 900015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': 'C', 'nbits': 8}

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024) /
                              3.0, 256), 256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024) /
                              3.0, 256), 512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(255, -1, -1) * 256))
        assert(np.all(res[0, ::256] == np.array([1,  86, 170, 255])))
        assert(np.all(res[256, :] == 0))
コード例 #8
0
ファイル: test_fci_l1c_nc.py プロジェクト: joleenf/satpy
 def _get_test_calib_for_channel_vis(self, chroot, meas):
     xrda = xr.DataArray
     data = {}
     data["state/celestial/earth_sun_distance"] = xrda(
         da.repeat(da.array([149597870.7]), 6000))
     data[meas + "/channel_effective_solar_irradiance"] = xrda(50)
     return data
コード例 #9
0
    def expand_reduce(cls, d_arr, repeats):
        if not isinstance(d_arr, da.Array):
            d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE)
        if all(x == 1 for x in repeats.values()):
            return d_arr
        elif all(x >= 1 for x in repeats.values()):
            # rechunk so new chunks are the same size as old chunks
            c_size = max(x[0] for x in d_arr.chunks)

            def _calc_chunks(c, c_size):
                whole_chunks = [c_size] * int(sum(c) // c_size)
                remaining = sum(c) - sum(whole_chunks)
                if remaining:
                    whole_chunks += [remaining]
                return tuple(whole_chunks)
            new_chunks = [_calc_chunks(x, int(c_size // repeats[axis]))
                          for axis, x in enumerate(d_arr.chunks)]
            d_arr = d_arr.rechunk(new_chunks)

            for axis, factor in repeats.items():
                if not factor.is_integer():
                    raise ValueError("Expand factor must be a whole number")
                d_arr = da.repeat(d_arr, int(factor), axis=axis)
            return d_arr
        elif all(x <= 1 for x in repeats.values()):
            # reduce
            y_size = 1. / repeats[0]
            x_size = 1. / repeats[1]
            return cls.aggregate(d_arr, y_size, x_size)
        else:
            raise ValueError("Must either expand or reduce in both "
                             "directions")
コード例 #10
0
ファイル: resample.py プロジェクト: davidh-ssec/satpy
    def expand_reduce(cls, d_arr, repeats):
        if not isinstance(d_arr, da.Array):
            d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE)
        if all(x == 1 for x in repeats.values()):
            return d_arr
        elif all(x >= 1 for x in repeats.values()):
            # rechunk so new chunks are the same size as old chunks
            c_size = max(x[0] for x in d_arr.chunks)

            def _calc_chunks(c, c_size):
                whole_chunks = [c_size] * int(sum(c) // c_size)
                remaining = sum(c) - sum(whole_chunks)
                if remaining:
                    whole_chunks += [remaining]
                return tuple(whole_chunks)
            new_chunks = [_calc_chunks(x, int(c_size // repeats[axis]))
                          for axis, x in enumerate(d_arr.chunks)]
            d_arr = d_arr.rechunk(new_chunks)

            for axis, factor in repeats.items():
                if not factor.is_integer():
                    raise ValueError("Expand factor must be a whole number")
                d_arr = da.repeat(d_arr, int(factor), axis=axis)
            return d_arr
        elif all(x <= 1 for x in repeats.values()):
            # reduce
            y_size = 1. / repeats[0]
            x_size = 1. / repeats[1]
            return cls.aggregate(d_arr, y_size, x_size)
        else:
            raise ValueError("Must either expand or reduce in both "
                             "directions")
コード例 #11
0
ファイル: test_ninjotiff.py プロジェクト: pytroll/pyninjotiff
def test_write_rgb_classified():
    """Test saving a transparent RGB."""
    area = STEREOGRAPHIC_AREA

    x_size, y_size = 1024, 1024
    arr = np.zeros((3, y_size, x_size))

    attrs = dict([('platform_name', 'NOAA-18'),
                  ('resolution', 1050),
                  ('polarization', None),
                  ('start_time', TIME - datetime.timedelta(minutes=65)),
                  ('end_time', TIME - datetime.timedelta(minutes=60)),
                  ('level', None),
                  ('sensor', 'avhrr-3'),
                  ('ancillary_variables', []),
                  ('area', area),
                  ('wavelength', None),
                  ('optional_datasets', []),
                  ('standard_name', 'overview'),
                  ('name', 'overview'),
                  ('prerequisites', [0.6, 0.8, 10.8]),
                  ('optional_prerequisites', []),
                  ('calibration', None),
                  ('modifiers', None),
                  ('mode', 'P')])

    kwargs = {'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 1700015, 'data_cat': 'PPRN', 'data_source': 'SMHI', 'nbits': 8}

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * 4
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data, coords={'bands': ['P']}, dims=['bands', 'y', 'x'], attrs=attrs)

    img = XRImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        res = tif[0].asarray()
        for idx in range(3):
            np.testing.assert_allclose(res[:, :, idx], np.round(
                np.nan_to_num(arr[idx, :, :]) * 255).astype(np.uint8))
        np.testing.assert_allclose(res[:, :, 3] == 0, np.isnan(arr[0, :, :]))
コード例 #12
0
ファイル: blockwise.py プロジェクト: hungyiwu/blockwise
def repeat_block(image, block_shape):
    """
    da.repeat for n-dim.
    """
    rep = image.copy()
    for ax in range(image.ndim):
        rep = da.repeat(rep, repeats=block_shape[ax], axis=ax)
    return rep
コード例 #13
0
def missing_spectrum(  # pylint: disable=too-many-locals
        df: DataArray, bins: int) -> Dict[str, da.Array]:
    """Calculate a missing spectrum for each column."""

    nrows, ncols = df.shape
    data = df.nulls

    if nrows > 1:
        num_bins = min(bins, nrows - 1)
        bin_size = nrows // num_bins
        chunk_size = min(1024 * 1024 * 128, nrows *
                         ncols)  # max 1024 x 1024 x 128 Bytes bool values
        nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1)
        chunk_size = nbins_per_chunk * bin_size
        data = data.rechunk((chunk_size, None))
        sep = nrows // chunk_size * chunk_size
    else:
        # avoid division or module by zero
        bin_size = 1
        nbins_per_chunk = 1
        chunk_size = 1
        data = data.rechunk((chunk_size, None))
        sep = 1

    spectrum_missing_percs = data[:sep].map_blocks(
        missing_perc_blockwise(bin_size),
        chunks=(nbins_per_chunk, *data.chunksize[1:]),
        dtype=float,
    )

    # calculation for the last chunk
    if sep != nrows:
        spectrum_missing_percs_remain = data[sep:].map_blocks(
            missing_perc_blockwise(bin_size),
            chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]),
            dtype=float,
        )
        spectrum_missing_percs = da.concatenate(
            [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0)

    num_bins = spectrum_missing_percs.shape[0]

    locs0 = da.arange(num_bins) * bin_size
    locs1 = da.minimum(locs0 + bin_size, nrows)
    locs_middle = locs0 + bin_size / 2

    return {
        "column":
        da.repeat(da.from_array(df.columns.values, (1, )), num_bins),
        "location":
        da.tile(locs_middle, ncols),
        "missing_rate":
        spectrum_missing_percs.T.ravel().rechunk(locs_middle.shape[0]),
        "loc_start":
        da.tile(locs0, ncols),
        "loc_end":
        da.tile(locs1, ncols),
    }
コード例 #14
0
def const_features_for_single_grid_single_file(grid_indx, wind_grid_indx, data):
    client = Client()
    dims = data['no2'].shape
    ntime = dims[0] - 1
    nvel = dims[2]
    data_dict = dict()
    data_hours = da.array(data['hour'][1:])
    data_dict['hour'] = da.repeat(data_hours[:, :], nvel, axis=1)
    data_dict['date'] = da.zeros((ntime, nvel)) + da.mean(data['date'][:])
    data_dict['date'] = data_dict['date']
    cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:, grid_indx, :])
    cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:, grid_indx, :])
    data_dict['IC_FLASHCOUNT'] = da.repeat(cum_ic_flash[1:, :] - cum_ic_flash[:-1, :], nvel, axis=1)
    data_dict['CG_FLASHCOUNT'] = da.repeat(cum_cg_flash[1:, :] - cum_cg_flash[:-1, :], nvel, axis=1)
    e_no_lower = da.array(data['E_NO'])[1:, grid_indx, :]
    e_no_upper = da.zeros((ntime, nvel - e_no_lower.shape[1]))
    data_dict['E_NO'] = da.concatenate([e_no_lower, e_no_upper], axis=1)
    data_dict['U'] = (data['U'][1:, wind_grid_indx[0][0], :] + data['U'][1:, wind_grid_indx[0][1], :])/2
    data_dict['V'] = (data['V'][1:, wind_grid_indx[1][0], :] + data['V'][1:, wind_grid_indx[1][1], :])/2

    match_vars = ['no2', 'pres', 'temp', 'CLDFRA']
    print('Variables read directly from wrf: {}'.format(match_vars[:]))
    for var in match_vars:
        data_dict[var] = da.array(data[var])[1:, grid_indx, :]

    reduce_dim_vars = ['elev', 'W']
    print('Variables average vertically: {}'.format(reduce_dim_vars[:]))
    for var in reduce_dim_vars:
        this_value = da.array(data[var])[1:, grid_indx, :]
        data_dict[var] = (this_value[:, 1:] + this_value[:, :-1]) / 2

    add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW']
    print('Variables add vertical layers: {}'.format(add_dim_vars[:]))

    for var in add_dim_vars:
        this_value = da.array(data[var])[1:, grid_indx, :]
        data_dict[var] = da.repeat(this_value, nvel, axis=1)

    print('Key of dict:{}'.format(data_dict.keys()))
    save_arr = []
    for var in data_dict.keys():
        data_dict[var] = data_dict[var].flatten()
        save_arr.append(data_dict[var])
    save_arr = da.array(save_arr).compute()
    return save_arr
コード例 #15
0
def test_write_bw():
    """Test saving a BW image."""
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': '90.0',
            'lat_ts': '60.0',
            'lon_0': '0.0',
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', '%'), ('name', '1'), ('level', None),
                  ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=5)),
                  ('end_time', TIME), ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    kwargs = {
        'ch_min_measurement_unit': np.array([0]),
        'ch_max_measurement_unit': np.array([120]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 100015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': '%',
        'nbits': 8
    }

    data = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                   1024).reshape((1, 1024, 1024))
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        res = tif[0].asarray()
        assert (np.allclose(res[0, 0, ::256],
                            np.array([256, 22016, 43520, 65280])))
コード例 #16
0
def missing_spectrum(  # pylint: disable=too-many-locals
        data: da.Array, cols: np.ndarray, bins: int) -> dd.DataFrame:
    """
    Calculate a missing spectrum for each column
    """
    nrows, ncols = data.shape
    num_bins = min(bins, nrows - 1)
    bin_size = nrows // num_bins
    chunk_size = min(1024 * 1024 * 128,
                     nrows * ncols)  # max 1024 x 1024 x 128 Bytes bool values
    nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1)

    chunk_size = nbins_per_chunk * bin_size

    data = data.rechunk((chunk_size, None))

    sep = nrows // chunk_size * chunk_size
    spectrum_missing_percs = data[:sep].map_blocks(
        missing_perc_blockwise(bin_size),
        chunks=(nbins_per_chunk, *data.shape[1:]),
        dtype=float,
    )

    # calculation for the last chunk
    if sep != nrows:
        spectrum_missing_percs_remain = data[sep:].map_blocks(
            missing_perc_blockwise(bin_size),
            chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]),
            dtype=float,
        )
        spectrum_missing_percs = da.concatenate(
            [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0)

    num_bins = spectrum_missing_percs.shape[0]

    locs0 = da.arange(num_bins) * bin_size
    locs1 = da.minimum(locs0 + bin_size, nrows)
    locs_middle = locs0 + bin_size / 2

    df = dd.from_dask_array(
        da.repeat(da.from_array(cols, (1, )), num_bins),
        columns=["column"],
    )

    df = df.assign(
        location=da.tile(locs_middle, ncols),
        missing_rate=spectrum_missing_percs.T.ravel().rechunk(
            locs_middle.shape[0]),
        loc_start=da.tile(locs0, ncols),
        loc_end=da.tile(locs1, ncols),
    )

    return df
コード例 #17
0
    def scale_swath_data(self, data, scaling_factors):
        """Scale swath data using scaling factors and offsets.

        Multi-granule (a.k.a. aggregated) files will have more than the usual two values.
        """
        num_grans = len(scaling_factors) // 2
        gran_size = data.shape[0] // num_grans
        factors = scaling_factors.where(scaling_factors > -999)
        factors = factors.data.reshape((-1, 2))
        factors = xr.DataArray(da.repeat(factors, gran_size, axis=0),
                               dims=(data.dims[0], 'factors'))
        data = data * factors[:, 0] + factors[:, 1]
        return data
コード例 #18
0
ファイル: test_ninjotiff.py プロジェクト: pytroll/pyninjotiff
def test_write_p():
    """Test saving an image in P mode.

    Values are 0, 1, 2, 3, 4, Palette is black, red, green, blue, gray.
    """
    area = STEREOGRAPHIC_AREA

    palette = [np.array((0, 0, 0, 1)),
               np.array((1, 0, 0, 1)),
               np.array((0, 1, 0, 1)),
               np.array((0, 0, 1, 1)),
               np.array((.5, .5, .5, 1)),
               ]
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'MSG'),
                  ('sensor', 'seviri'),
                  ("palette", palette),
                  ('name', 'msg_cloudtop_height'),
                  ('level', None),
                  ('modifiers', ()),
                  ('start_time', TIME - datetime.timedelta(minutes=85)),
                  ('end_time', TIME - datetime.timedelta(minutes=80)),
                  ('area', area),
                  ('ancillary_variables', [])])

    data = da.tile(da.repeat(da.arange(5, chunks=1024, dtype=np.uint8), 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data, coords={'bands': ['P']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    kwargs = {'compute': True, 'fill_value': None, 'sat_id': 9000014,
              'chan_id': 1900015, 'data_cat': 'GPRN', 'data_source': 'SMHI',
              'physic_unit': 'NONE', "physic_value": "NONE",
              "description": "NWCSAF Cloud Top Height"}

    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        colormap, res = _load_file_values_with_colormap(filename)

        np.testing.assert_array_equal(res[0, ::205], [0, 1, 2, 3, 4])
        assert(len(colormap) == 768)
        for i, line in enumerate(palette):
            np.testing.assert_array_equal(colormap[i::256], (line[:3] * 255).astype(int))
コード例 #19
0
ファイル: test_ninjotiff.py プロジェクト: pytroll/pyninjotiff
def test_write_bw():
    """Test saving a BW image.

    Reflectances.
    """
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', '%'),
                  ('name', '1'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=5)),
                  ('end_time', TIME),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    kwargs = {'ch_min_measurement_unit': xr.DataArray(0),
              'ch_max_measurement_unit': xr.DataArray(120),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': '%', 'nbits': 8}

    data = da.tile(da.repeat(da.arange(4, chunks=1024) /
                             3.0, 256), 1024).reshape((1, 1024, 1024))
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) * 256))
        assert(np.all(res[0, ::256] == np.array([1,  86, 170, 255])))
コード例 #20
0
ファイル: compute.py プロジェクト: MatthieuRouland/dataprep
def missing_spectrum(df: dd.DataFrame, bins: int,
                     ncols: int) -> Tuple[dd.DataFrame, dd.DataFrame]:
    """
    Calculate a missing spectrum for each column
    """
    # pylint: disable=too-many-locals
    num_bins = min(bins, len(df) - 1)

    df = df.iloc[:, :ncols]
    cols = df.columns[:ncols]
    ncols = len(cols)
    nrows = len(df)
    chunk_size = len(df) // num_bins
    data = df.isnull().to_dask_array()
    data.compute_chunk_sizes()
    data = data.rechunk((chunk_size, None))

    notnull_counts = data.sum(axis=0) / data.shape[0]
    total_missing_percs = {
        col: notnull_counts[idx]
        for idx, col in enumerate(cols)
    }

    spectrum_missing_percs = data.map_blocks(missing_perc_blockwise,
                                             chunks=(1, data.shape[1]),
                                             dtype=float)
    nsegments = len(spectrum_missing_percs)

    locs0 = da.arange(nsegments) * chunk_size
    locs1 = da.minimum(locs0 + chunk_size, nrows)
    locs_middle = locs0 + chunk_size / 2

    df = dd.from_dask_array(
        da.repeat(da.from_array(cols.values, (1, )), nsegments),
        columns=["column"],
    )

    df = df.assign(
        location=da.tile(locs_middle, ncols),
        missing_rate=spectrum_missing_percs.T.ravel(),
        loc_start=da.tile(locs0, ncols),
        loc_end=da.tile(locs1, ncols),
    )

    return df, total_missing_percs
コード例 #21
0
def main(argv=None):

    #     cluster = LocalCluster(dashboard_address=None)
    #     client = Client(cluster, memory_limit='{}GB'.format(FLAGS.memory_limit),
    #                     processes=False)

    K.set_floatx('float32')

    chunk_size = FLAGS.chunk_size

    # Read data set
    hdf5_file = h5py.File(FLAGS.data_file, 'r')
    images, labels, _ = hdf52dask(hdf5_file,
                                  FLAGS.group,
                                  chunk_size,
                                  shuffle=FLAGS.shuffle,
                                  seed=FLAGS.seed,
                                  pct=FLAGS.pct)
    n_images = images.shape[0]
    n_batches = int(np.ceil(n_images / float(FLAGS.batch_size)))

    # Data augmentation parameters
    daug_params_file = get_daug_scheme_path(FLAGS.daug_params, FLAGS.data_file)
    daug_params = yaml.load(open(daug_params_file, 'r'),
                            Loader=yaml.FullLoader)
    nodaug_params_file = get_daug_scheme_path('nodaug.yml', FLAGS.data_file)
    nodaug_params = yaml.load(open(nodaug_params_file, 'r'),
                              Loader=yaml.FullLoader)

    # Initialize the network model
    model_filename = FLAGS.model
    model = load_model(model_filename)

    # Print the model summary
    model.summary()

    # Get relevant layers
    if FLAGS.store_input:
        layer_regex = '({}|.*input.*)'.format(FLAGS.layer_regex)
    else:
        layer_regex = FLAGS.layer_regex

    layers = [
        layer.name for layer in model.layers
        if re.compile(layer_regex).match(layer.name)
    ]

    # Create batch generators
    n_daug_rep = FLAGS.n_daug_rep
    n_diff_per_batch = int(FLAGS.batch_size / n_daug_rep)
    image_gen_daug = get_generator(images, **daug_params)
    batch_gen_daug = batch_generator(image_gen_daug,
                                     images,
                                     labels,
                                     batch_size=n_diff_per_batch,
                                     aug_per_im=n_daug_rep,
                                     shuffle=False)
    image_gen_nodaug = get_generator(images, **nodaug_params)
    batch_gen_nodaug = batch_generator(image_gen_nodaug,
                                       images,
                                       labels,
                                       FLAGS.batch_size,
                                       aug_per_im=1,
                                       shuffle=False)

    # Outputs
    if FLAGS.output_dir == '-1':
        FLAGS.output_dir = os.path.dirname(FLAGS.model)

    output_hdf5 = h5py.File(
        os.path.join(FLAGS.output_dir, FLAGS.output_mse_matrix_hdf5), 'w')
    output_pickle = os.path.join(FLAGS.output_dir, FLAGS.output_pickle)
    df_init_idx = 0
    df = pd.DataFrame()

    # Iterate over the layers
    for layer_idx, layer_name in enumerate(layers):

        # Reload the model
        if layer_idx > 0:
            K.clear_session()
            model = load_model(model_filename)

        layer = model.get_layer(layer_name)

        # Rename input layer
        if re.compile('.*input.*').match(layer_name):
            layer_name = 'input'

        hdf5_layer = output_hdf5.create_group(layer_name)

        activation_function = K.function(
            [model.input, K.learning_phase()], [layer.output])

        print('\nComputing pairwise similarity at layer {}'.format(layer_name))

        # Compute activations of original data (without augmentation)
        a_nodaug_da = get_activations(activation_function, batch_gen_nodaug)
        a_nodaug_da = da.squeeze(a_nodaug_da)
        a_nodaug_da = da.rechunk(a_nodaug_da,
                                 (chunk_size, ) + (a_nodaug_da.shape[1:]))
        dim_activations = a_nodaug_da.shape[1]

        # Comute matrix of similarities
        r = da.reshape(da.sum(da.square(a_nodaug_da), axis=1), (-1, 1))
        mse_matrix = (r - 2 * da.dot(a_nodaug_da,
                                     da.transpose(a_nodaug_da)) \
                     + da.transpose(r)) / dim_activations

        # Compute activations with augmentation
        a_daug_da = get_activations(activation_function, batch_gen_daug)
        a_daug_da = da.rechunk(a_daug_da, (chunk_size, dim_activations, 1))

        # Compute similarity of augmentations with respect to the
        # activations of the original data
        a_nodaug_da = da.repeat(da.reshape(a_nodaug_da,
                                           a_nodaug_da.shape + (1, )),
                                repeats=n_daug_rep,
                                axis=2)
        a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, dim_activations, 1))
        mse_daug = da.mean(da.square(a_nodaug_da - a_daug_da), axis=1)

        # Compute invariance score
        mse_sum = da.repeat(da.reshape(da.sum(mse_matrix, axis=1),
                                       (n_images, 1)),
                            repeats=n_daug_rep,
                            axis=1)
        mse_sum = da.rechunk(mse_sum, (chunk_size, 1))
        invariance = 1 - n_images * da.divide(mse_daug, mse_sum)

        print('Dimensionality activations: {}x{}x{}'.format(
            n_images, dim_activations, n_daug_rep))

        # Store HDF5 file
        if FLAGS.output_mse_matrix_hdf5:
            mse_matrix_ds = hdf5_layer.create_dataset(
                'mse_matrix',
                shape=mse_matrix.shape,
                chunks=mse_matrix.chunksize,
                dtype=K.floatx())
            mse_daug_ds = hdf5_layer.create_dataset('mse_daug',
                                                    shape=mse_daug.shape,
                                                    chunks=mse_daug.chunksize,
                                                    dtype=K.floatx())
            invariance_ds = hdf5_layer.create_dataset(
                'invariance',
                shape=invariance.shape,
                chunks=invariance.chunksize,
                dtype=K.floatx())
            time_init = time()
            with ProgressBar(dt=1):
                da.store([mse_matrix, mse_daug, invariance],
                         [mse_matrix_ds, mse_daug_ds, invariance_ds])
            time_end = time()
            print('Elapsed time: {}'.format(time_end - time_init))

            invariance = np.ravel(
                np.asarray(output_hdf5[layer_name]['invariance']))
        else:
            time_init = time()
            invariance = da.ravel(invariance).compute()
            time_end = time()
            print('Elapsed time: {}'.format(time_end - time_init))

        # Update pandas data frame for plotting
        df_end_idx = df_init_idx + n_images * n_daug_rep
        d = pd.DataFrame(
            {
                'Layer': layer_name,
                'sample': np.repeat(np.arange(n_images), n_daug_rep),
                'n_daug': np.tile(np.arange(n_daug_rep), n_images),
                'invariance': invariance
            },
            index=np.arange(df_init_idx, df_end_idx).tolist())
        df = df.append(d)
        df_init_idx += df_end_idx

    pickle.dump(df, open(output_pickle, 'wb'))
    output_hdf5.close()
コード例 #22
0
def calibration_double_ended_wls(ds,
                                 st_label,
                                 ast_label,
                                 rst_label,
                                 rast_label,
                                 st_var,
                                 ast_var,
                                 rst_var,
                                 rast_var,
                                 calc_cov=True,
                                 solver='sparse',
                                 dtype32=False):
    """


    Parameters
    ----------
    ds : DataStore
    st_label
    ast_label
    rst_label
    rast_label
    st_var
    ast_var
    rst_var
    rast_var
    calc_cov
    solver : {'sparse', 'stats'}

    Returns
    -------

    """

    # x_alpha_set_zero=0.,
    # set one alpha for all times to zero
    # x_alpha_set_zeroi = np.argmin(np.abs(ds.x.data - x_alpha_set_zero))
    # x_alpha_set_zeroidata = np.arange(nt) * no + x_alpha_set_zeroi

    cal_ref = ds.ufunc_per_section(label=st_label,
                                   ref_temp_broadcasted=True,
                                   calc_per='all')

    st = ds.ufunc_per_section(label=st_label, calc_per='all')
    ast = ds.ufunc_per_section(label=ast_label, calc_per='all')
    rst = ds.ufunc_per_section(label=rst_label, calc_per='all')
    rast = ds.ufunc_per_section(label=rast_label, calc_per='all')
    z = ds.ufunc_per_section(label='x', calc_per='all')

    nx = z.size

    _xsorted = np.argsort(ds.x.data)
    _ypos = np.searchsorted(ds.x.data[_xsorted], z)
    x_index = _xsorted[_ypos]

    no, nt = ds[st_label].data.shape

    p0_est = np.asarray([482., 0.1] + nt * [1.4] + no * [0.])

    # Data for F and B temperature, 2 * nt * nx items
    data1 = da.repeat(1 / (cal_ref.T.ravel() + 273.15), 2)  # gamma
    # data2 = da.tile(np.array([0., -1.]), nt * nx)  # alphaint
    data2 = da.stack((da.zeros(nt * nx, chunks=nt * nx),
                      -da.ones(nt * nx, chunks=nt * nx))).T.ravel()
    # data3 = da.tile(np.array([-1., -1.]), nt * nx)  # C
    data3 = -da.ones(2 * nt * nx, chunks=2 * nt * nx)
    # data5 = da.tile(np.array([-1., 1.]), nt * nx)  # alph
    data5 = da.stack((-da.ones(nt * nx, chunks=nt * nx),
                      da.ones(nt * nx, chunks=nt * nx))).T.ravel()

    # Data for alpha, nt * no items
    # data6 = da.repeat(np.array([-0.5]), nt * no)  # alphaint
    data6 = da.ones(nt * no, dtype=float,
                    chunks=(nt * no, )) * -0.5  # alphaint
    data9 = da.ones(nt * no, dtype=float, chunks=(nt * no, ))  # alpha

    # alpha should start at zero. But then the sparse solver crashes
    # data9[x_alpha_set_zeroidata] = 0.

    data = da.concatenate([data1, data2, data3, data5, data6, data9]).compute()

    # Coords (irow, icol)
    coord1row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # gamma
    coord2row = da.arange(2 * nt * nx, dtype=int,
                          chunks=(nt * nx, ))  # alphaint
    coord3row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # C
    coord5row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # alpha

    coord6row = da.arange(2 * nt * nx,
                          2 * nt * nx + nt * no,
                          dtype=int,
                          chunks=(nt * no, ))  # alphaint
    coord9row = da.arange(2 * nt * nx,
                          2 * nt * nx + nt * no,
                          dtype=int,
                          chunks=(nt * no, ))  # alpha

    coord1col = da.zeros(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # gamma
    coord2col = da.ones(2 * nt * nx, dtype=int, chunks=(nt * nx, )) * (
        2 + nt + no - 1)  # alphaint
    coord3col = da.repeat(da.arange(nt, dtype=int, chunks=(nt, )) + 2,
                          2 * nx).rechunk(nt * nx)  # C
    coord5col = da.tile(np.repeat(x_index, 2) + nt + 2,
                        nt).rechunk(nt * nx)  # alpha

    coord6col = da.ones(nt * no, dtype=int,
                        chunks=(nt * no, ))  # * (2 + nt + no - 1)  # alphaint
    coord9col = da.tile(
        da.arange(no, dtype=int, chunks=(nt * no, )) + nt + 2, nt)  # alpha

    rows = [coord1row, coord2row, coord3row, coord5row, coord6row, coord9row]
    cols = [coord1col, coord2col, coord3col, coord5col, coord6col, coord9col]
    coords = (da.concatenate(rows).compute(), da.concatenate(cols).compute())

    # try scipy.sparse.bsr_matrix
    X = sp.coo_matrix((data, coords),
                      shape=(2 * nx * nt + nt * no, nt + 2 + no),
                      dtype=float,
                      copy=False)

    # Spooky way to interleave and ravel arrays in correct order. Works!
    y1F = da.log(st / ast).T.ravel()
    y1B = da.log(rst / rast).T.ravel()
    y1 = da.stack([y1F, y1B]).T.ravel()

    y2F = da.log(ds[st_label].data / ds[ast_label].data).T.ravel()
    y2B = da.log(ds[rst_label].data / ds[rast_label].data).T.ravel()
    y2 = (y2B - y2F) / 2
    y = da.concatenate([y1, y2]).compute()

    # Calculate the reprocical of the variance (not std)
    w1F = (1 / st**2 * st_var + 1 / ast**2 * ast_var).T.ravel()
    w1B = (1 / rst**2 * rst_var + 1 / rast**2 * rast_var).T.ravel()
    w1 = da.stack([w1F, w1B]).T.ravel()

    w2 = (0.5 / ds[st_label].data**2 * st_var +
          0.5 / ds[ast_label].data**2 * ast_var +
          0.5 / ds[rst_label].data**2 * rst_var +
          0.5 / ds[rast_label].data**2 * rast_var).T.ravel()
    w = da.concatenate([w1, w2]).compute()

    if solver == 'sparse':
        p_sol, p_var, p_cov = wls_sparse(X,
                                         y,
                                         w=w,
                                         x0=p0_est,
                                         calc_cov=calc_cov,
                                         dtype32=dtype32)

    elif solver == 'stats':
        p_sol, p_var, p_cov = wls_stats(X, y, w=w, calc_cov=calc_cov)

    if calc_cov:
        return nt, z, p_sol, p_var, p_cov
    else:
        return nt, z, p_sol, p_var
コード例 #23
0
def read_orig_file_from_wrf(filename):
    keep_indx = np.load('target_cells_index.npy')
    reader = csv.reader(open("surrouding_cells.csv", "r"))
    surr_arr = []
    for row in reader:
        this_arr = list(map(int, row[1].strip(']|[').split(',')))
        surr_arr.append(this_arr)
    surr_arr = np.array(surr_arr)[keep_indx, :]
    data = nc.Dataset(filename)
    labels = []

    for variable in data.variables:
        #print(variable + ":" + str(data[variable].shape))
        labels.append(variable)
    for i in range(0, 24):
        labels.append('anthro_surr_emis_{:02d}'.format(i))
        labels.append('lightning_surr_emis_{:02d}'.format(i))
    labels.append('total_lightning')
    data_dict = {label: [] for label in labels}
    extra_vars = ['xlon', 'xlat', 'hour', 'date', 'IC_FLASHCOUNT', 'CG_FLASHCOUNT', 'E_NO', 'U', 'V']
    #print('Variables require extra processing steps: {}'.format(extra_vars[:]))
    dims = data['no2'].shape
    ntime = dims[0]-1
    ngrid = len(keep_indx)
    nvel = dims[2]
    data_hours = da.array(data['hour'][1:], dtype='float32')
    data_dict['hour'] = da.repeat(da.repeat(data_hours[:, :, np.newaxis], ngrid, axis=1), nvel, axis=2)
    xlon = da.array(data['xlon'][:], dtype='float32').flatten()[np.newaxis, keep_indx, np.newaxis]
    data_dict['xlon'] = da.repeat(da.repeat(xlon, ntime, axis=0), nvel, axis=2)
    xlat = da.array(data['xlat'][:], dtype='float32').flatten()[np.newaxis, keep_indx, np.newaxis]
    data_dict['xlat'] = da.repeat(da.repeat(xlat, ntime, axis=0), nvel, axis=2)
    data_dict['date'] = da.zeros((ntime, ngrid, nvel)) + da.mean(data['date'][:], dtype='float32')
    data_dict['date'] = data_dict['date']
    cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:], dtype='float32')
    cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:], dtype='float32')
    ic_flash = da.repeat(cum_ic_flash[1:, :, :]-cum_ic_flash[:-1, :, :], nvel, axis=2)
    cg_flash = da.repeat(cum_cg_flash[1:, :, :]-cum_cg_flash[:-1, :, :], nvel, axis=2)
    e_lightning = ic_flash + cg_flash
    data_dict['IC_FLASHCOUNT'] = ic_flash[:, keep_indx, :]
    data_dict['CG_FLASHCOUNT'] = cg_flash[:, keep_indx, :]
    data_dict['total_lightning'] = e_lightning[:, keep_indx, :]
    e_no_lower = da.array(data['E_NO'], dtype='float32')[1:, :, :]
    e_no_upper = da.zeros((ntime, e_no_lower.shape[1], nvel - e_no_lower.shape[2]), dtype='float32')
    e_no = da.concatenate([e_no_lower, e_no_upper], axis=2)
    data_dict['E_NO'] = e_no[:, keep_indx, :]
    for i in range(0, 24):
        this_label = 'anthro_surr_emis_{:02d}'.format(i)
        surr_indx = surr_arr[:, i]
        data_dict[this_label] = e_no[:, surr_indx, :]
        this_label = 'lightning_surr_emis_{:02d}'.format(i)
        data_dict[this_label] = e_lightning[:, surr_indx, :]
    stg_u = da.array(data['U'], dtype='float32')
    stg_v = da.array(data['V'], dtype='float32')
    u_indx_left, u_indx_right, v_indx_bot, v_indx_up = find_indx_for_wind()
    wind_u = (stg_u[1:, u_indx_left, :] + stg_u[1:, u_indx_right, :])/2
    data_dict['U'] = wind_u[:, keep_indx, :]
    wind_v = (stg_v[1:, v_indx_up, :] + stg_v[1:, v_indx_bot, :])/2
    data_dict['V'] = wind_v[:, keep_indx, :]

    match_vars = ['no2', 'pres', 'temp', 'CLDFRA']
    #print('Variables read directly from wrf: {}'.format(match_vars[:]))
    for var in match_vars:
        data_dict[var] = da.array(data[var], dtype='float32')[1:, keep_indx, :]

    reduce_dim_vars = ['elev', 'W']
    #print('Variables average vertically: {}'.format(reduce_dim_vars[:]))
    for var in reduce_dim_vars:
        this_value = da.array(data[var], dtype='float32')[1:, keep_indx, :]
        data_dict[var] = (this_value[:, :, 1:] + this_value[:, :, :-1])/2

    add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW']
    #print('Variables add vertical layers: {}'.format(add_dim_vars[:]))

    for var in add_dim_vars:
        this_value = da.array(data[var], dtype='float32')[1:, keep_indx, :]
        data_dict[var] = da.repeat(this_value, nvel, axis=2)

    #print('Key of dict:{}'.format(data_dict.keys()))
    additional_features = ['xlon', 'xlat', 'date', 'elev', 'hour', 'IC_FLASHCOUNT', 'CG_FLASHCOUNT']
    y_label = ['no2']
    x_labels = [label for label in labels if label not in additional_features and label not in y_label]
    
    additional_arr = []
    x_arr = []
    y_arr = []
    for var in labels:
        #print('Reading this variable:{}'.format(var))
        this_value = data_dict[var].flatten()
        if var in additional_features:
            additional_arr.append(this_value)
        elif var in x_labels:
            x_arr.append(this_value.compute())
        elif var in y_label:
            y_arr.append(this_value.compute())
    return additional_arr, x_arr, y_arr, x_labels, additional_features
コード例 #24
0
 def _get_test_calib_for_channel_vis(self, chroot, meas):
     data = super()._get_test_calib_for_channel_vis(chroot, meas)
     data["state/celestial/earth_sun_distance"] = xr.DataArray(da.repeat(da.array([30000000]), 6000))
     return data
コード例 #25
0
def activations(images, labels, batch_size, model, layer_regex, nodaug_params, 
                daug_params, include_input=False, class_invariance=False, 
                n_daug_rep=0,  norms=['fro']):
    """
    Computes metrics from the activations, such as the norm of the feature
    maps, data augmentation invariance, class invariance, etc.

    Parameters
    ----------
    images : h5py Dataset
        The set of images

    labels : h5py Dataset
        The ground truth labels

    batch_size : int
        Batch size

    model : Keras Model
        The model

    nodaug_params : dict
        Dictionary of data augmentation parameters for the baseline

    daug_params : dict
        Dictionary of data augmentation parameters

    include_input : bool
        If True, the input layer is considered for the analysis

    class_invariance : bool
        If True, the class invariance score is computed

    n_daug_rep : int
        If larger than 0, the data augentation invariance score is computed,
        performing n_daug_rep repetitions of random augmentations

    norms : list
        List of keywords to specify the types of norms to compute on the 
        activations

    Returns
    -------
    results_dict : dict
        Dictionary containing some performance metrics
    """
    def _update_stats(mean_norm, std_norm, norm):
        mean_norm_batch = np.mean(norm, axis=0)
        std_norm_batch = np.std(norm, axis=0)
        mean_norm = init / float(end) * mean_norm + \
                    batch_size / float(end) * mean_norm_batch
        std_norm = init / float(end) * std_norm ** 2 + \
                    batch_size / float(end) * std_norm_batch ** 2 + \
                    (init * batch_size) / float(end ** 2) * \
                    (mean_norm - mean_norm_batch) ** 2
        std_norm = np.sqrt(std_norm)

        return mean_norm, std_norm

    def _frobenius_norm(activations):
        norm = np.linalg.norm(
                activations, ord='fro', 
                axis=tuple(range(1, len(activations.shape) - 1)))
        return norm

    def _inf_norm(activations):
        norm = np.max(np.abs(activations),
                      axis=tuple(range(1, len(activations.shape) - 1)))
        return norm

    model = del_extra_nodes(model)

    n_images = images.shape[0]
    n_batches_per_epoch = int(np.ceil(float(n_images) / batch_size))

    # Get relevant layers
    if include_input:
        layer_regex = '({}|.*input.*)'.format(layer_regex)
    else:
        layer_regex = layer_regex

    layers = [layer.name for layer in model.layers 
              if re.compile(layer_regex).match(layer.name)]

    # Initialize HDF5 to store the activations
#     filename = 'hdf5_aux_{}'.format(time.time())
#     activations_hdf5_aux = h5py.File(filename, 'w')
#     hdf5_aux = [filename]
# 
#     grp_activations = activations_hdf5_aux.create_group('activations')

    if class_invariance:
#         grp_labels = activations_hdf5_aux.create_group('labels')
        labels_true_da = []
        labels_pred_da = []
        predictions_da = []
#         labels_true = grp_labels.create_dataset(
#                 'labels_true', shape=(n_images, ), dtype=np.uint8)
#         labels_pred = grp_labels.create_dataset(
#                 'labels_pred', shape=(n_images, ), dtype=np.uint8)
#         predictions = grp_labels.create_dataset(
#                 'predictions', shape=labels.shape, dtype=K.floatx())
        idx_softmax = model.output_names.index('softmax')
        store_labels = True
    else:
        store_labels = False

    # Initialize results dictionary
    results_dict = {'activations_norm': {}, 'summary': {}, 
                    'class_invariance': {}, 'daug_invariance': {}} 

    # Iterate over the layers
    for layer_name in layers:

        # Create batch generator
        image_gen = get_generator(images, **nodaug_params)
        batch_gen = generate_batches(image_gen, images, labels, batch_size,
                                     aug_per_im=1, shuffle=False)

        layer = model.get_layer(layer_name)
        layer_shape = layer.output_shape[1:]
        n_channels = layer_shape[-1]

        if re.compile('.*input.*').match(layer_name):
            layer_name = 'input'

        print('\nLayer {}\n'.format(layer_name))

        # Create a Dataset for the activations of the layer
#         activations_layer = grp_activations.create_dataset(
#                 layer_name, shape=(n_images, ) + layer_shape, 
#                 dtype=K.floatx())
        # Create dask array for the activations of the layer
        activations_layer_da = []

        # Initialize placeholders in the results dict for the layer
        results_dict['activations_norm'].update({layer_name: 
            {n: {'mean': np.zeros(n_channels), 
                 'std': np.zeros(n_channels)} for n in norms}})
        layer_dict = results_dict['activations_norm'][layer_name]

        activation_function = K.function([model.input, 
                                          K.learning_phase()], 
                                         [layer.output])

        # Iterate over the data set in batches
        init = 0
        for batch_images, batch_labels in tqdm(
                batch_gen, total=n_batches_per_epoch):

            batch_size = batch_images.shape[0]
            end = init + batch_size

            # Store labels
            if store_labels:
                preds = model.predict_on_batch(batch_images)
                if isinstance(preds, list):
                    preds = preds[idx_softmax]
                labels_pred_da.append(da.from_array(
                    np.argmax(preds, axis=1)))
                labels_true_da.append(da.from_array(
                    np.argmax(batch_labels, axis=1)))
                predictions_da.append(da.from_array(preds))
#                 labels_pred[init:end] = np.argmax(preds, axis=1)
#                 labels_true[init:end] = np.argmax(batch_labels, axis=1)
#                 predictions[init:end, :] = preds

            # Get and store activations
            activations = activation_function([batch_images, 0])[0]
            activations_layer_da.append(da.from_array(
                activations, chunks=activations.shape))
#             activations_layer[init:end] = activations

            # Compute norms
            for norm_key in norms:
                mean_norm = layer_dict[norm_key]['mean']
                std_norm = layer_dict[norm_key]['std']
                if norm_key == 'fro':
                    norm = _frobenius_norm(activations)
                elif norm_key == 'inf':
                    norm = _inf_norm(activations)
                else:
                    raise NotImplementedError('Implemented norms are fro '
                            'and inf')
                mean_norm, std_norm = _update_stats(mean_norm, std_norm, 
                                                    norm)
                layer_dict[norm_key]['mean'] = mean_norm
                layer_dict[norm_key]['std'] = std_norm

            init = end
            if init == n_images:
                store_labels = False
                break

        # Concatenate dask arrays
        activations_layer_da = da.concatenate(activations_layer_da, axis=0)
        activations_layer_da = activations_layer_da.reshape((n_images, -1))
        d_activations = activations_layer_da.shape[-1]

        if class_invariance:
            print('\nComputing class invariance\n')
            labels_pred_da = da.concatenate(labels_pred_da)
            labels_true_da = da.concatenate(labels_true_da)
            predictions_da = da.concatenate(predictions_da)
            n_classes = len(np.unique(labels_true_da))

        # Compute MSE matrix of the activations
        r = da.reshape(da.sum(da.square(activations_layer_da), 
                                        axis=1), (-1, 1))
        mse_matrix_da = (r - 2 * da.dot(activations_layer_da,
                                     da.transpose(activations_layer_da)) \
                     + da.transpose(r)) / d_activations
        mse_matrix_da = mse_matrix_da.rechunk((mse_matrix_da.chunksize[0],
                                               mse_matrix_da.shape[-1]))

        # Compute class invariance
        time0 = time()
        results_dict['class_invariance'].update({layer_name: {}})
        class_invariance_scores_da = []
        if class_invariance:
#             mse_matrix_mean = da.mean(mse_matrix_da).compute()
            for cl in tqdm(range(n_classes)):
                labels_cl = labels_pred_da == cl
                labels_cl = labels_cl.compute()
                mse_class = mse_matrix_da[labels_cl, :][:, labels_cl]
                mse_class = mse_class.rechunk((-1, -1))
#                 mse_class_mean = da.mean(mse_class).compute()
#                 class_invariance_score = 1. - np.divide(
#                         mse_class_mean, mse_matrix_mean)
#                 results_dict['class_invariance'][layer_name].update(
#                         {cl: class_invariance_score})
                class_invariance_scores_da.append(
                        1. - da.divide(da.mean(mse_class),
                                       da.mean(mse_matrix_da)))

        # Compute data augmentation invariance
        print('\nComputing data augmentation invariance\n')
        mse_daug_da = []

        results_dict['daug_invariance'].update({layer_name: {}})

        for r in range(n_daug_rep):
            print('Repetition {}'.format(r))

            image_gen_daug = get_generator(images, **daug_params)
            batch_gen_daug = generate_batches(image_gen_daug, images, labels, 
                                              batch_size, aug_per_im=1, 
                                              shuffle=False)

            activations_layer_daug_da = []

            # Iterate over the data set in batches to compute activations
            init = 0
            for batch_images, batch_labels in tqdm(
                    batch_gen, total=n_batches_per_epoch):

                batch_size = batch_images.shape[0]
                end = init + batch_size

                # Get and store activations
                activations = activation_function([batch_images, 0])[0]
                activations_layer_daug_da.append(da.from_array(
                    activations, chunks=activations.shape))

                init = end
                if init == n_images:
                    break

            activations_layer_daug_da = da.concatenate(
                    activations_layer_daug_da, axis=0)
            activations_layer_daug_da = activations_layer_daug_da.reshape(
                    (n_images, -1))
            activations_layer_daug_da = activations_layer_daug_da.rechunk(
                    (activations_layer_daug_da.chunksize[0],
                     activations_layer_daug_da.shape[-1]))

            # Compute MSE daug
            mse_daug_da.append(da.mean(da.square(activations_layer_da - \
                                                 activations_layer_daug_da), 
                                       axis=1))

        mse_daug_da = da.stack(mse_daug_da, axis=1)

        mse_sum = da.repeat(da.reshape(da.sum(mse_matrix_da, axis=1),
                                       (n_images, 1)), n_daug_rep, axis=1)

        daug_invariance_score_da = 1 - n_images * da.divide(mse_daug_da, mse_sum)

        time1 = time()

        # Compute dask results and update results dict
        results_dask = da.compute(class_invariance_scores_da,
                                  daug_invariance_score_da)

        time2 = time()

        results_dict['class_invariance'][layer_name].update(
                {cl: cl_inv_score 
                    for cl, cl_inv_score in enumerate(results_dask[0])})
        results_dict['daug_invariance'].update({layer_name: 
            {r: daug_inv_score 
                for r, daug_inv_score in enumerate(results_dask[1].T)}})
    # Compute summary statistics of the norms across the channels
    for layer, layer_dict in results_dict['activations_norm'].items():
        results_dict['summary'].update({layer: {}})
        for norm_key, norm_dict in layer_dict.items():
            results_dict['summary'][layer].update({norm_key: {
                'mean': np.mean(norm_dict['mean']), 
                'std': np.mean(norm_dict['std'])}})

    return results_dict
コード例 #26
0
 def _expand_tiepoint_array_1km(self, arr, lines, cols):
     arr = da.repeat(arr, lines, axis=1)
     arr = da.concatenate((arr[:, :lines//2, :], arr, arr[:, -(lines//2):, :]), axis=1)
     arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1)
     return da.hstack((arr, arr[:, -cols:]))
コード例 #27
0
ファイル: test_ninjotiff.py プロジェクト: pytroll/pyninjotiff
def test_write_bw_colormap():
    """Test saving a BW image with a colormap.

    Albedo with a colormap.

    Reflectances are 0, 29.76, 60, 90.24, 120.
    """
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', '%'),
                  ('name', '1'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=75)),
                  ('end_time', TIME - datetime.timedelta(minutes=70)),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    cm_vis = [0, 4095, 5887, 7167, 8191, 9215, 9983, 10751, 11519, 12287, 12799,
              13567, 14079, 14847, 15359, 15871, 16383, 16895, 17407, 17919, 18175,
              18687, 19199, 19711, 19967, 20479, 20735, 21247, 21503, 22015, 22271,
              22783, 23039, 23551, 23807, 24063, 24575, 24831, 25087, 25599, 25855,
              26111, 26367, 26879, 27135, 27391, 27647, 27903, 28415, 28671, 28927,
              29183, 29439, 29695, 29951, 30207, 30463, 30975, 31231, 31487, 31743,
              31999, 32255, 32511, 32767, 33023, 33279, 33535, 33791, 34047, 34303,
              34559, 34559, 34815, 35071, 35327, 35583, 35839, 36095, 36351, 36607,
              36863, 37119, 37119, 37375, 37631, 37887, 38143, 38399, 38655, 38655,
              38911, 39167, 39423, 39679, 39935, 39935, 40191, 40447, 40703, 40959,
              40959, 41215, 41471, 41727, 41983, 41983, 42239, 42495, 42751, 42751,
              43007, 43263, 43519, 43519, 43775, 44031, 44287, 44287, 44543, 44799,
              45055, 45055, 45311, 45567, 45823, 45823, 46079, 46335, 46335, 46591,
              46847, 46847, 47103, 47359, 47615, 47615, 47871, 48127, 48127, 48383,
              48639, 48639, 48895, 49151, 49151, 49407, 49663, 49663, 49919, 50175,
              50175, 50431, 50687, 50687, 50943, 50943, 51199, 51455, 51455, 51711,
              51967, 51967, 52223, 52223, 52479, 52735, 52735, 52991, 53247, 53247,
              53503, 53503, 53759, 54015, 54015, 54271, 54271, 54527, 54783, 54783,
              55039, 55039, 55295, 55551, 55551, 55807, 55807, 56063, 56319, 56319,
              56575, 56575, 56831, 56831, 57087, 57343, 57343, 57599, 57599, 57855,
              57855, 58111, 58367, 58367, 58623, 58623, 58879, 58879, 59135, 59135,
              59391, 59647, 59647, 59903, 59903, 60159, 60159, 60415, 60415, 60671,
              60671, 60927, 60927, 61183, 61439, 61439, 61695, 61695, 61951, 61951,
              62207, 62207, 62463, 62463, 62719, 62719, 62975, 62975, 63231, 63231,
              63487, 63487, 63743, 63743, 63999, 63999, 64255, 64255, 64511, 64511,
              64767, 64767, 65023, 65023, 65279]

    kwargs = {'ch_min_measurement_unit': np.array([0]),
              'ch_max_measurement_unit': np.array([120]),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': '%', 'nbits': 8, 'cmap': [cm_vis] * 3}

    data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        colormap, res = _load_file_values_with_colormap(filename)

        assert(len(colormap) == 768)
        assert(np.allclose(colormap[:256], cm_vis))
        assert(np.allclose(colormap[256:512], cm_vis))
        assert(np.allclose(colormap[512:], cm_vis))
        assert(np.allclose(res[0, ::205], np.array([1,  64, 128, 192, 255])))
コード例 #28
0
 def _expand_tiepoint_array_5km(self, arr, lines, cols):
     arr = da.repeat(arr, lines * 2, axis=1)
     arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)),
                     cols,
                     axis=1)
     return da.hstack((arr[:, :2], arr, arr[:, -2:]))
コード例 #29
0
    positiveInd(iLims2, L))
fStEn2bool = lambda iStEn, length: da.hstack(
    [(da.ones(iEn2iSt, dtype=np.bool8) if b else da.zeros(iEn2iSt, dtype=np.bool8)) for iEn2iSt, b in da.vstack((
        da.diff(
            da.hstack(
                (
                    0,
                    iStEn.flat,
                    length))),
        da.hstack(
            (
                da.repeat(
                    [
                        (
                            False,
                            True)],
                    np.size(
                        iStEn,
                        0),
                    0).flat,
                False)))).T])
TimeShift_Log_sec = 60

kVabs = np.float64([[0.361570991503], [0]])
# @-<<Castom defenitions>>
# @+<<loading>>
# @+node:korzh.20180525121734.1: ** <<loading>>
# @+others
# @+node:korzh.20180526160931.1: *3* coef
"""
Load or set default
コード例 #30
0
ファイル: test_ninjotiff.py プロジェクト: vakkov/pyninjotiff
def test_write_bw_fill():
    """Test saving a BW image with transparency."""
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': 90.0,
            'lat_ts': 60.0,
            'lon_0': 0.0,
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', '%'), ('name', '1'), ('level', None),
                  ('modifiers', ()), ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=25)),
                  ('end_time', TIME - datetime.timedelta(minutes=20)),
                  ('area', area), ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    kwargs = {
        'ch_min_measurement_unit': np.array([0]),
        'ch_max_measurement_unit': np.array([120]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 100015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': '%',
        'nbits': 8
    }

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                    256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                    512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert (np.all(
                np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) *
                256))
        assert (np.all(res[0, ::256] == np.array([1, 86, 170, 255])))
        assert (np.all(res[256, :] == 0))
コード例 #31
0
ファイル: test_ninjotiff.py プロジェクト: vakkov/pyninjotiff
def test_write_ir_colormap():
    """Test saving a IR image with a colormap.

    IR with a colormap.

    Temperatures are -70, -40.24, -10, 20.24, 50.
    """
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': 90.0,
            'lat_ts': 60.0,
            'lon_0': 0.0,
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 70.0 / 120
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', 'K'), ('name', '4'), ('level', None),
                  ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=85)),
                  ('end_time', TIME - datetime.timedelta(minutes=80)),
                  ('area', area), ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    ir_map = [
        255, 1535, 2559, 3327, 4095, 4863, 5375, 5887, 6399, 6911, 7423, 7935,
        8447, 8959, 9471, 9983, 10239, 10751, 11263, 11519, 12031, 12287,
        12799, 13055, 13567, 13823, 14335, 14591, 14847, 15359, 15615, 16127,
        16383, 16639, 17151, 17407, 17663, 17919, 18431, 18687, 18943, 19199,
        19711, 19967, 20223, 20479, 20735, 21247, 21503, 21759, 22015, 22271,
        22527, 22783, 23295, 23551, 23807, 24063, 24319, 24575, 24831, 25087,
        25343, 25599, 25855, 26367, 26623, 26879, 27135, 27391, 27647, 27903,
        28159, 28415, 28671, 28927, 29183, 29439, 29695, 29951, 30207, 30463,
        30719, 30975, 31231, 31487, 31743, 31999, 31999, 32255, 32511, 32767,
        33023, 33279, 33535, 33791, 34047, 34303, 34559, 34815, 35071, 35327,
        35327, 35583, 35839, 36095, 36351, 36607, 36863, 37119, 37375, 37375,
        37631, 37887, 38143, 38399, 38655, 38911, 39167, 39167, 39423, 39679,
        39935, 40191, 40447, 40703, 40703, 40959, 41215, 41471, 41727, 41983,
        41983, 42239, 42495, 42751, 43007, 43263, 43263, 43519, 43775, 44031,
        44287, 44287, 44543, 44799, 45055, 45311, 45311, 45567, 45823, 46079,
        46335, 46335, 46591, 46847, 47103, 47359, 47359, 47615, 47871, 48127,
        48127, 48383, 48639, 48895, 49151, 49151, 49407, 49663, 49919, 49919,
        50175, 50431, 50687, 50687, 50943, 51199, 51455, 51455, 51711, 51967,
        52223, 52223, 52479, 52735, 52991, 52991, 53247, 53503, 53759, 53759,
        54015, 54271, 54527, 54527, 54783, 55039, 55039, 55295, 55551, 55807,
        55807, 56063, 56319, 56319, 56575, 56831, 57087, 57087, 57343, 57599,
        57599, 57855, 58111, 58367, 58367, 58623, 58879, 58879, 59135, 59391,
        59391, 59647, 59903, 60159, 60159, 60415, 60671, 60671, 60927, 61183,
        61183, 61439, 61695, 61695, 61951, 62207, 62463, 62463, 62719, 62975,
        62975, 63231, 63487, 63487, 63743, 63999, 63999, 64255, 64511, 64511,
        64767, 65023, 65023, 65279
    ]

    kwargs = {
        'ch_min_measurement_unit': np.array([-70]),
        'ch_max_measurement_unit': np.array([50]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 900015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': 'C',
        'nbits': 8,
        'cmap': [ir_map] * 3
    }

    data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value

        assert (len(colormap) == 768)
        assert (np.allclose(colormap[:256], ir_map))
        assert (np.allclose(colormap[256:512], ir_map))
        assert (np.allclose(colormap[512:], ir_map))
        assert (np.allclose(res[0, ::205], np.array([1, 64, 128, 192, 255])))