def _expand_tiepoint_array_5km(self, arr, lines, cols): arr = da.repeat(arr, lines * 2, axis=1) arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) if self.cscan_full_width == 271: return da.hstack((arr[:, :2], arr, arr[:, -2:])) else: return da.hstack((arr[:, :2], arr, arr[:, -5:], arr[:, -2:]))
def _expand_tiepoint_array_1km(self, arr, lines, cols): arr = da.repeat(arr, lines, axis=1) arr = da.concatenate( (arr[:, :lines // 2, :], arr, arr[:, -(lines // 2):, :]), axis=1) arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) return da.hstack((arr, arr[:, -cols:]))
def _expand_tiepoint_array_5km(self, arr, lines, cols): arr = da.repeat(arr, lines * 2, axis=1) arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) factor = self.fscan_width // self.cscan_width if self.cscan_full_width == 271: return da.hstack((arr[:, :2 * factor], arr, arr[:, -2 * factor:])) else: return da.hstack((arr[:, :2 * factor], arr, arr[:, -self.fscan_width:], arr[:, -2 * factor:]))
def test_repeat(): x = np.random.random((10, 11, 13)) d = da.from_array(x, chunks=(4, 5, 3)) repeats = [1, 2, 5] axes = [-3, -2, -1, 0, 1, 2] for r in repeats: for a in axes: assert_eq(x.repeat(r, axis=a), d.repeat(r, axis=a)) assert_eq(d.repeat(2, 0), da.repeat(d, 2, 0)) with pytest.raises(NotImplementedError): da.repeat(d, np.arange(10)) with pytest.raises(NotImplementedError): da.repeat(d, 2, None) with pytest.raises(NotImplementedError): da.repeat(d, 2) for invalid_axis in [3, -4]: with pytest.raises(ValueError): da.repeat(d, 2, axis=invalid_axis) x = np.arange(5) d = da.arange(5, chunks=(2,)) assert_eq(x.repeat(3), d.repeat(3)) for r in [1, 2, 3, 4]: assert all(concat(d.repeat(r).chunks))
def test_repeat(): x = np.random.random((10, 11, 13)) d = da.from_array(x, chunks=(4, 5, 3)) repeats = [0, 1, 2, 5] axes = [-3, -2, -1, 0, 1, 2] for r in repeats: for a in axes: assert_eq(x.repeat(r, axis=a), d.repeat(r, axis=a)) assert_eq(d.repeat(2, 0), da.repeat(d, 2, 0)) with pytest.raises(NotImplementedError): da.repeat(d, np.arange(10)) with pytest.raises(NotImplementedError): da.repeat(d, 2, None) with pytest.raises(NotImplementedError): da.repeat(d, 2) for invalid_axis in [3, -4]: with pytest.raises(ValueError): da.repeat(d, 2, axis=invalid_axis) x = np.arange(5) d = da.arange(5, chunks=(2, )) assert_eq(x.repeat(3), d.repeat(3)) for r in [1, 2, 3, 4]: assert all(concat(d.repeat(r).chunks))
def _expand_tiepoint_array_5km(self, arr, lines, cols): if self.level == 2: # Repeat the last column to complete L2 data arr = da.dstack([arr, arr[:, :, -1]]) arr = da.repeat(arr, lines * 2, axis=1) if self.level == 1: arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) elif self.level == 2: arr = da.repeat(arr.reshape((-1, self.cscan_full_width)), cols, axis=1) return da.hstack((arr[:, :2], arr, arr[:, -2:]))
def test_write_bw_inverted_ir_fill(): """Test saving a BW image with transparency.""" area = STEREOGRAPHIC_AREA scale = 1.0 / 120 offset = 70.0 / 120 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', 'K'), ('name', '4'), ('level', None), ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)), ('calibration', 'brightness_temperature'), ('start_time', TIME - datetime.timedelta(minutes=35)), ('end_time', TIME - datetime.timedelta(minutes=30)), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{'offset': offset, 'scale': scale}])]) kwargs = {'ch_min_measurement_unit': np.array([-70]), 'ch_max_measurement_unit': np.array([50]), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 900015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': 'C', 'nbits': 8} data1 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 256).reshape((1, 256, 1024)) datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan data2 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 512).reshape((1, 512, 1024)) data = da.concatenate((data1, datanan, data2), axis=1) data = xr.DataArray(data, coords={'bands': ['L']}, dims=[ 'bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) page = tif[0] res = page.asarray(colormapped=False).squeeze() colormap = page.tags['color_map'].value for i in range(3): assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(255, -1, -1) * 256)) assert(np.all(res[0, ::256] == np.array([1, 86, 170, 255]))) assert(np.all(res[256, :] == 0))
def _get_test_calib_for_channel_vis(self, chroot, meas): xrda = xr.DataArray data = {} data["state/celestial/earth_sun_distance"] = xrda( da.repeat(da.array([149597870.7]), 6000)) data[meas + "/channel_effective_solar_irradiance"] = xrda(50) return data
def expand_reduce(cls, d_arr, repeats): if not isinstance(d_arr, da.Array): d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE) if all(x == 1 for x in repeats.values()): return d_arr elif all(x >= 1 for x in repeats.values()): # rechunk so new chunks are the same size as old chunks c_size = max(x[0] for x in d_arr.chunks) def _calc_chunks(c, c_size): whole_chunks = [c_size] * int(sum(c) // c_size) remaining = sum(c) - sum(whole_chunks) if remaining: whole_chunks += [remaining] return tuple(whole_chunks) new_chunks = [_calc_chunks(x, int(c_size // repeats[axis])) for axis, x in enumerate(d_arr.chunks)] d_arr = d_arr.rechunk(new_chunks) for axis, factor in repeats.items(): if not factor.is_integer(): raise ValueError("Expand factor must be a whole number") d_arr = da.repeat(d_arr, int(factor), axis=axis) return d_arr elif all(x <= 1 for x in repeats.values()): # reduce y_size = 1. / repeats[0] x_size = 1. / repeats[1] return cls.aggregate(d_arr, y_size, x_size) else: raise ValueError("Must either expand or reduce in both " "directions")
def test_write_rgb_classified(): """Test saving a transparent RGB.""" area = STEREOGRAPHIC_AREA x_size, y_size = 1024, 1024 arr = np.zeros((3, y_size, x_size)) attrs = dict([('platform_name', 'NOAA-18'), ('resolution', 1050), ('polarization', None), ('start_time', TIME - datetime.timedelta(minutes=65)), ('end_time', TIME - datetime.timedelta(minutes=60)), ('level', None), ('sensor', 'avhrr-3'), ('ancillary_variables', []), ('area', area), ('wavelength', None), ('optional_datasets', []), ('standard_name', 'overview'), ('name', 'overview'), ('prerequisites', [0.6, 0.8, 10.8]), ('optional_prerequisites', []), ('calibration', None), ('modifiers', None), ('mode', 'P')]) kwargs = {'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 1700015, 'data_cat': 'PPRN', 'data_source': 'SMHI', 'nbits': 8} data1 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 256).reshape((1, 256, 1024)) datanan = da.ones((1, 256, 1024), chunks=1024) * 4 data2 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 512).reshape((1, 512, 1024)) data = da.concatenate((data1, datanan, data2), axis=1) data = xr.DataArray(data, coords={'bands': ['P']}, dims=['bands', 'y', 'x'], attrs=attrs) img = XRImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) res = tif[0].asarray() for idx in range(3): np.testing.assert_allclose(res[:, :, idx], np.round( np.nan_to_num(arr[idx, :, :]) * 255).astype(np.uint8)) np.testing.assert_allclose(res[:, :, 3] == 0, np.isnan(arr[0, :, :]))
def repeat_block(image, block_shape): """ da.repeat for n-dim. """ rep = image.copy() for ax in range(image.ndim): rep = da.repeat(rep, repeats=block_shape[ax], axis=ax) return rep
def missing_spectrum( # pylint: disable=too-many-locals df: DataArray, bins: int) -> Dict[str, da.Array]: """Calculate a missing spectrum for each column.""" nrows, ncols = df.shape data = df.nulls if nrows > 1: num_bins = min(bins, nrows - 1) bin_size = nrows // num_bins chunk_size = min(1024 * 1024 * 128, nrows * ncols) # max 1024 x 1024 x 128 Bytes bool values nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1) chunk_size = nbins_per_chunk * bin_size data = data.rechunk((chunk_size, None)) sep = nrows // chunk_size * chunk_size else: # avoid division or module by zero bin_size = 1 nbins_per_chunk = 1 chunk_size = 1 data = data.rechunk((chunk_size, None)) sep = 1 spectrum_missing_percs = data[:sep].map_blocks( missing_perc_blockwise(bin_size), chunks=(nbins_per_chunk, *data.chunksize[1:]), dtype=float, ) # calculation for the last chunk if sep != nrows: spectrum_missing_percs_remain = data[sep:].map_blocks( missing_perc_blockwise(bin_size), chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]), dtype=float, ) spectrum_missing_percs = da.concatenate( [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0) num_bins = spectrum_missing_percs.shape[0] locs0 = da.arange(num_bins) * bin_size locs1 = da.minimum(locs0 + bin_size, nrows) locs_middle = locs0 + bin_size / 2 return { "column": da.repeat(da.from_array(df.columns.values, (1, )), num_bins), "location": da.tile(locs_middle, ncols), "missing_rate": spectrum_missing_percs.T.ravel().rechunk(locs_middle.shape[0]), "loc_start": da.tile(locs0, ncols), "loc_end": da.tile(locs1, ncols), }
def const_features_for_single_grid_single_file(grid_indx, wind_grid_indx, data): client = Client() dims = data['no2'].shape ntime = dims[0] - 1 nvel = dims[2] data_dict = dict() data_hours = da.array(data['hour'][1:]) data_dict['hour'] = da.repeat(data_hours[:, :], nvel, axis=1) data_dict['date'] = da.zeros((ntime, nvel)) + da.mean(data['date'][:]) data_dict['date'] = data_dict['date'] cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:, grid_indx, :]) cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:, grid_indx, :]) data_dict['IC_FLASHCOUNT'] = da.repeat(cum_ic_flash[1:, :] - cum_ic_flash[:-1, :], nvel, axis=1) data_dict['CG_FLASHCOUNT'] = da.repeat(cum_cg_flash[1:, :] - cum_cg_flash[:-1, :], nvel, axis=1) e_no_lower = da.array(data['E_NO'])[1:, grid_indx, :] e_no_upper = da.zeros((ntime, nvel - e_no_lower.shape[1])) data_dict['E_NO'] = da.concatenate([e_no_lower, e_no_upper], axis=1) data_dict['U'] = (data['U'][1:, wind_grid_indx[0][0], :] + data['U'][1:, wind_grid_indx[0][1], :])/2 data_dict['V'] = (data['V'][1:, wind_grid_indx[1][0], :] + data['V'][1:, wind_grid_indx[1][1], :])/2 match_vars = ['no2', 'pres', 'temp', 'CLDFRA'] print('Variables read directly from wrf: {}'.format(match_vars[:])) for var in match_vars: data_dict[var] = da.array(data[var])[1:, grid_indx, :] reduce_dim_vars = ['elev', 'W'] print('Variables average vertically: {}'.format(reduce_dim_vars[:])) for var in reduce_dim_vars: this_value = da.array(data[var])[1:, grid_indx, :] data_dict[var] = (this_value[:, 1:] + this_value[:, :-1]) / 2 add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW'] print('Variables add vertical layers: {}'.format(add_dim_vars[:])) for var in add_dim_vars: this_value = da.array(data[var])[1:, grid_indx, :] data_dict[var] = da.repeat(this_value, nvel, axis=1) print('Key of dict:{}'.format(data_dict.keys())) save_arr = [] for var in data_dict.keys(): data_dict[var] = data_dict[var].flatten() save_arr.append(data_dict[var]) save_arr = da.array(save_arr).compute() return save_arr
def test_write_bw(): """Test saving a BW image.""" from pyninjotiff.ninjotiff import save from pyninjotiff.tifffile import TiffFile area = FakeArea( { 'ellps': 'WGS84', 'lat_0': '90.0', 'lat_ts': '60.0', 'lon_0': '0.0', 'proj': 'stere' }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024) scale = 1.0 / 120 offset = 0.0 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', '%'), ('name', '1'), ('level', None), ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)), ('calibration', 'brightness_temperature'), ('start_time', TIME - datetime.timedelta(minutes=5)), ('end_time', TIME), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{ 'offset': offset, 'scale': scale }])]) kwargs = { 'ch_min_measurement_unit': np.array([0]), 'ch_max_measurement_unit': np.array([120]), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': '%', 'nbits': 8 } data = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 1024).reshape((1, 1024, 1024)) data = xr.DataArray(data, coords={'bands': ['L']}, dims=['bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) res = tif[0].asarray() assert (np.allclose(res[0, 0, ::256], np.array([256, 22016, 43520, 65280])))
def missing_spectrum( # pylint: disable=too-many-locals data: da.Array, cols: np.ndarray, bins: int) -> dd.DataFrame: """ Calculate a missing spectrum for each column """ nrows, ncols = data.shape num_bins = min(bins, nrows - 1) bin_size = nrows // num_bins chunk_size = min(1024 * 1024 * 128, nrows * ncols) # max 1024 x 1024 x 128 Bytes bool values nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1) chunk_size = nbins_per_chunk * bin_size data = data.rechunk((chunk_size, None)) sep = nrows // chunk_size * chunk_size spectrum_missing_percs = data[:sep].map_blocks( missing_perc_blockwise(bin_size), chunks=(nbins_per_chunk, *data.shape[1:]), dtype=float, ) # calculation for the last chunk if sep != nrows: spectrum_missing_percs_remain = data[sep:].map_blocks( missing_perc_blockwise(bin_size), chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]), dtype=float, ) spectrum_missing_percs = da.concatenate( [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0) num_bins = spectrum_missing_percs.shape[0] locs0 = da.arange(num_bins) * bin_size locs1 = da.minimum(locs0 + bin_size, nrows) locs_middle = locs0 + bin_size / 2 df = dd.from_dask_array( da.repeat(da.from_array(cols, (1, )), num_bins), columns=["column"], ) df = df.assign( location=da.tile(locs_middle, ncols), missing_rate=spectrum_missing_percs.T.ravel().rechunk( locs_middle.shape[0]), loc_start=da.tile(locs0, ncols), loc_end=da.tile(locs1, ncols), ) return df
def scale_swath_data(self, data, scaling_factors): """Scale swath data using scaling factors and offsets. Multi-granule (a.k.a. aggregated) files will have more than the usual two values. """ num_grans = len(scaling_factors) // 2 gran_size = data.shape[0] // num_grans factors = scaling_factors.where(scaling_factors > -999) factors = factors.data.reshape((-1, 2)) factors = xr.DataArray(da.repeat(factors, gran_size, axis=0), dims=(data.dims[0], 'factors')) data = data * factors[:, 0] + factors[:, 1] return data
def test_write_p(): """Test saving an image in P mode. Values are 0, 1, 2, 3, 4, Palette is black, red, green, blue, gray. """ area = STEREOGRAPHIC_AREA palette = [np.array((0, 0, 0, 1)), np.array((1, 0, 0, 1)), np.array((0, 1, 0, 1)), np.array((0, 0, 1, 1)), np.array((.5, .5, .5, 1)), ] attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'MSG'), ('sensor', 'seviri'), ("palette", palette), ('name', 'msg_cloudtop_height'), ('level', None), ('modifiers', ()), ('start_time', TIME - datetime.timedelta(minutes=85)), ('end_time', TIME - datetime.timedelta(minutes=80)), ('area', area), ('ancillary_variables', [])]) data = da.tile(da.repeat(da.arange(5, chunks=1024, dtype=np.uint8), 205)[:-1], 1024).reshape((1, 1024, 1024))[:, :1024] data = xr.DataArray(data, coords={'bands': ['P']}, dims=[ 'bands', 'y', 'x'], attrs=attrs) kwargs = {'compute': True, 'fill_value': None, 'sat_id': 9000014, 'chan_id': 1900015, 'data_cat': 'GPRN', 'data_source': 'SMHI', 'physic_unit': 'NONE', "physic_value": "NONE", "description": "NWCSAF Cloud Top Height"} img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) colormap, res = _load_file_values_with_colormap(filename) np.testing.assert_array_equal(res[0, ::205], [0, 1, 2, 3, 4]) assert(len(colormap) == 768) for i, line in enumerate(palette): np.testing.assert_array_equal(colormap[i::256], (line[:3] * 255).astype(int))
def test_write_bw(): """Test saving a BW image. Reflectances. """ area = STEREOGRAPHIC_AREA scale = 1.0 / 120 offset = 0.0 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', '%'), ('name', '1'), ('level', None), ('modifiers', ()), ('wavelength', (0.5, 0.6, 0.7)), ('calibration', 'reflectance'), ('start_time', TIME - datetime.timedelta(minutes=5)), ('end_time', TIME), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{'offset': offset, 'scale': scale}])]) kwargs = {'ch_min_measurement_unit': xr.DataArray(0), 'ch_max_measurement_unit': xr.DataArray(120), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': '%', 'nbits': 8} data = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 1024).reshape((1, 1024, 1024)) data = xr.DataArray(data, coords={'bands': ['L']}, dims=[ 'bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) page = tif[0] res = page.asarray(colormapped=False).squeeze() colormap = page.tags['color_map'].value for i in range(3): assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) * 256)) assert(np.all(res[0, ::256] == np.array([1, 86, 170, 255])))
def missing_spectrum(df: dd.DataFrame, bins: int, ncols: int) -> Tuple[dd.DataFrame, dd.DataFrame]: """ Calculate a missing spectrum for each column """ # pylint: disable=too-many-locals num_bins = min(bins, len(df) - 1) df = df.iloc[:, :ncols] cols = df.columns[:ncols] ncols = len(cols) nrows = len(df) chunk_size = len(df) // num_bins data = df.isnull().to_dask_array() data.compute_chunk_sizes() data = data.rechunk((chunk_size, None)) notnull_counts = data.sum(axis=0) / data.shape[0] total_missing_percs = { col: notnull_counts[idx] for idx, col in enumerate(cols) } spectrum_missing_percs = data.map_blocks(missing_perc_blockwise, chunks=(1, data.shape[1]), dtype=float) nsegments = len(spectrum_missing_percs) locs0 = da.arange(nsegments) * chunk_size locs1 = da.minimum(locs0 + chunk_size, nrows) locs_middle = locs0 + chunk_size / 2 df = dd.from_dask_array( da.repeat(da.from_array(cols.values, (1, )), nsegments), columns=["column"], ) df = df.assign( location=da.tile(locs_middle, ncols), missing_rate=spectrum_missing_percs.T.ravel(), loc_start=da.tile(locs0, ncols), loc_end=da.tile(locs1, ncols), ) return df, total_missing_percs
def main(argv=None): # cluster = LocalCluster(dashboard_address=None) # client = Client(cluster, memory_limit='{}GB'.format(FLAGS.memory_limit), # processes=False) K.set_floatx('float32') chunk_size = FLAGS.chunk_size # Read data set hdf5_file = h5py.File(FLAGS.data_file, 'r') images, labels, _ = hdf52dask(hdf5_file, FLAGS.group, chunk_size, shuffle=FLAGS.shuffle, seed=FLAGS.seed, pct=FLAGS.pct) n_images = images.shape[0] n_batches = int(np.ceil(n_images / float(FLAGS.batch_size))) # Data augmentation parameters daug_params_file = get_daug_scheme_path(FLAGS.daug_params, FLAGS.data_file) daug_params = yaml.load(open(daug_params_file, 'r'), Loader=yaml.FullLoader) nodaug_params_file = get_daug_scheme_path('nodaug.yml', FLAGS.data_file) nodaug_params = yaml.load(open(nodaug_params_file, 'r'), Loader=yaml.FullLoader) # Initialize the network model model_filename = FLAGS.model model = load_model(model_filename) # Print the model summary model.summary() # Get relevant layers if FLAGS.store_input: layer_regex = '({}|.*input.*)'.format(FLAGS.layer_regex) else: layer_regex = FLAGS.layer_regex layers = [ layer.name for layer in model.layers if re.compile(layer_regex).match(layer.name) ] # Create batch generators n_daug_rep = FLAGS.n_daug_rep n_diff_per_batch = int(FLAGS.batch_size / n_daug_rep) image_gen_daug = get_generator(images, **daug_params) batch_gen_daug = batch_generator(image_gen_daug, images, labels, batch_size=n_diff_per_batch, aug_per_im=n_daug_rep, shuffle=False) image_gen_nodaug = get_generator(images, **nodaug_params) batch_gen_nodaug = batch_generator(image_gen_nodaug, images, labels, FLAGS.batch_size, aug_per_im=1, shuffle=False) # Outputs if FLAGS.output_dir == '-1': FLAGS.output_dir = os.path.dirname(FLAGS.model) output_hdf5 = h5py.File( os.path.join(FLAGS.output_dir, FLAGS.output_mse_matrix_hdf5), 'w') output_pickle = os.path.join(FLAGS.output_dir, FLAGS.output_pickle) df_init_idx = 0 df = pd.DataFrame() # Iterate over the layers for layer_idx, layer_name in enumerate(layers): # Reload the model if layer_idx > 0: K.clear_session() model = load_model(model_filename) layer = model.get_layer(layer_name) # Rename input layer if re.compile('.*input.*').match(layer_name): layer_name = 'input' hdf5_layer = output_hdf5.create_group(layer_name) activation_function = K.function( [model.input, K.learning_phase()], [layer.output]) print('\nComputing pairwise similarity at layer {}'.format(layer_name)) # Compute activations of original data (without augmentation) a_nodaug_da = get_activations(activation_function, batch_gen_nodaug) a_nodaug_da = da.squeeze(a_nodaug_da) a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, ) + (a_nodaug_da.shape[1:])) dim_activations = a_nodaug_da.shape[1] # Comute matrix of similarities r = da.reshape(da.sum(da.square(a_nodaug_da), axis=1), (-1, 1)) mse_matrix = (r - 2 * da.dot(a_nodaug_da, da.transpose(a_nodaug_da)) \ + da.transpose(r)) / dim_activations # Compute activations with augmentation a_daug_da = get_activations(activation_function, batch_gen_daug) a_daug_da = da.rechunk(a_daug_da, (chunk_size, dim_activations, 1)) # Compute similarity of augmentations with respect to the # activations of the original data a_nodaug_da = da.repeat(da.reshape(a_nodaug_da, a_nodaug_da.shape + (1, )), repeats=n_daug_rep, axis=2) a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, dim_activations, 1)) mse_daug = da.mean(da.square(a_nodaug_da - a_daug_da), axis=1) # Compute invariance score mse_sum = da.repeat(da.reshape(da.sum(mse_matrix, axis=1), (n_images, 1)), repeats=n_daug_rep, axis=1) mse_sum = da.rechunk(mse_sum, (chunk_size, 1)) invariance = 1 - n_images * da.divide(mse_daug, mse_sum) print('Dimensionality activations: {}x{}x{}'.format( n_images, dim_activations, n_daug_rep)) # Store HDF5 file if FLAGS.output_mse_matrix_hdf5: mse_matrix_ds = hdf5_layer.create_dataset( 'mse_matrix', shape=mse_matrix.shape, chunks=mse_matrix.chunksize, dtype=K.floatx()) mse_daug_ds = hdf5_layer.create_dataset('mse_daug', shape=mse_daug.shape, chunks=mse_daug.chunksize, dtype=K.floatx()) invariance_ds = hdf5_layer.create_dataset( 'invariance', shape=invariance.shape, chunks=invariance.chunksize, dtype=K.floatx()) time_init = time() with ProgressBar(dt=1): da.store([mse_matrix, mse_daug, invariance], [mse_matrix_ds, mse_daug_ds, invariance_ds]) time_end = time() print('Elapsed time: {}'.format(time_end - time_init)) invariance = np.ravel( np.asarray(output_hdf5[layer_name]['invariance'])) else: time_init = time() invariance = da.ravel(invariance).compute() time_end = time() print('Elapsed time: {}'.format(time_end - time_init)) # Update pandas data frame for plotting df_end_idx = df_init_idx + n_images * n_daug_rep d = pd.DataFrame( { 'Layer': layer_name, 'sample': np.repeat(np.arange(n_images), n_daug_rep), 'n_daug': np.tile(np.arange(n_daug_rep), n_images), 'invariance': invariance }, index=np.arange(df_init_idx, df_end_idx).tolist()) df = df.append(d) df_init_idx += df_end_idx pickle.dump(df, open(output_pickle, 'wb')) output_hdf5.close()
def calibration_double_ended_wls(ds, st_label, ast_label, rst_label, rast_label, st_var, ast_var, rst_var, rast_var, calc_cov=True, solver='sparse', dtype32=False): """ Parameters ---------- ds : DataStore st_label ast_label rst_label rast_label st_var ast_var rst_var rast_var calc_cov solver : {'sparse', 'stats'} Returns ------- """ # x_alpha_set_zero=0., # set one alpha for all times to zero # x_alpha_set_zeroi = np.argmin(np.abs(ds.x.data - x_alpha_set_zero)) # x_alpha_set_zeroidata = np.arange(nt) * no + x_alpha_set_zeroi cal_ref = ds.ufunc_per_section(label=st_label, ref_temp_broadcasted=True, calc_per='all') st = ds.ufunc_per_section(label=st_label, calc_per='all') ast = ds.ufunc_per_section(label=ast_label, calc_per='all') rst = ds.ufunc_per_section(label=rst_label, calc_per='all') rast = ds.ufunc_per_section(label=rast_label, calc_per='all') z = ds.ufunc_per_section(label='x', calc_per='all') nx = z.size _xsorted = np.argsort(ds.x.data) _ypos = np.searchsorted(ds.x.data[_xsorted], z) x_index = _xsorted[_ypos] no, nt = ds[st_label].data.shape p0_est = np.asarray([482., 0.1] + nt * [1.4] + no * [0.]) # Data for F and B temperature, 2 * nt * nx items data1 = da.repeat(1 / (cal_ref.T.ravel() + 273.15), 2) # gamma # data2 = da.tile(np.array([0., -1.]), nt * nx) # alphaint data2 = da.stack((da.zeros(nt * nx, chunks=nt * nx), -da.ones(nt * nx, chunks=nt * nx))).T.ravel() # data3 = da.tile(np.array([-1., -1.]), nt * nx) # C data3 = -da.ones(2 * nt * nx, chunks=2 * nt * nx) # data5 = da.tile(np.array([-1., 1.]), nt * nx) # alph data5 = da.stack((-da.ones(nt * nx, chunks=nt * nx), da.ones(nt * nx, chunks=nt * nx))).T.ravel() # Data for alpha, nt * no items # data6 = da.repeat(np.array([-0.5]), nt * no) # alphaint data6 = da.ones(nt * no, dtype=float, chunks=(nt * no, )) * -0.5 # alphaint data9 = da.ones(nt * no, dtype=float, chunks=(nt * no, )) # alpha # alpha should start at zero. But then the sparse solver crashes # data9[x_alpha_set_zeroidata] = 0. data = da.concatenate([data1, data2, data3, data5, data6, data9]).compute() # Coords (irow, icol) coord1row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, )) # gamma coord2row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, )) # alphaint coord3row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, )) # C coord5row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, )) # alpha coord6row = da.arange(2 * nt * nx, 2 * nt * nx + nt * no, dtype=int, chunks=(nt * no, )) # alphaint coord9row = da.arange(2 * nt * nx, 2 * nt * nx + nt * no, dtype=int, chunks=(nt * no, )) # alpha coord1col = da.zeros(2 * nt * nx, dtype=int, chunks=(nt * nx, )) # gamma coord2col = da.ones(2 * nt * nx, dtype=int, chunks=(nt * nx, )) * ( 2 + nt + no - 1) # alphaint coord3col = da.repeat(da.arange(nt, dtype=int, chunks=(nt, )) + 2, 2 * nx).rechunk(nt * nx) # C coord5col = da.tile(np.repeat(x_index, 2) + nt + 2, nt).rechunk(nt * nx) # alpha coord6col = da.ones(nt * no, dtype=int, chunks=(nt * no, )) # * (2 + nt + no - 1) # alphaint coord9col = da.tile( da.arange(no, dtype=int, chunks=(nt * no, )) + nt + 2, nt) # alpha rows = [coord1row, coord2row, coord3row, coord5row, coord6row, coord9row] cols = [coord1col, coord2col, coord3col, coord5col, coord6col, coord9col] coords = (da.concatenate(rows).compute(), da.concatenate(cols).compute()) # try scipy.sparse.bsr_matrix X = sp.coo_matrix((data, coords), shape=(2 * nx * nt + nt * no, nt + 2 + no), dtype=float, copy=False) # Spooky way to interleave and ravel arrays in correct order. Works! y1F = da.log(st / ast).T.ravel() y1B = da.log(rst / rast).T.ravel() y1 = da.stack([y1F, y1B]).T.ravel() y2F = da.log(ds[st_label].data / ds[ast_label].data).T.ravel() y2B = da.log(ds[rst_label].data / ds[rast_label].data).T.ravel() y2 = (y2B - y2F) / 2 y = da.concatenate([y1, y2]).compute() # Calculate the reprocical of the variance (not std) w1F = (1 / st**2 * st_var + 1 / ast**2 * ast_var).T.ravel() w1B = (1 / rst**2 * rst_var + 1 / rast**2 * rast_var).T.ravel() w1 = da.stack([w1F, w1B]).T.ravel() w2 = (0.5 / ds[st_label].data**2 * st_var + 0.5 / ds[ast_label].data**2 * ast_var + 0.5 / ds[rst_label].data**2 * rst_var + 0.5 / ds[rast_label].data**2 * rast_var).T.ravel() w = da.concatenate([w1, w2]).compute() if solver == 'sparse': p_sol, p_var, p_cov = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=calc_cov, dtype32=dtype32) elif solver == 'stats': p_sol, p_var, p_cov = wls_stats(X, y, w=w, calc_cov=calc_cov) if calc_cov: return nt, z, p_sol, p_var, p_cov else: return nt, z, p_sol, p_var
def read_orig_file_from_wrf(filename): keep_indx = np.load('target_cells_index.npy') reader = csv.reader(open("surrouding_cells.csv", "r")) surr_arr = [] for row in reader: this_arr = list(map(int, row[1].strip(']|[').split(','))) surr_arr.append(this_arr) surr_arr = np.array(surr_arr)[keep_indx, :] data = nc.Dataset(filename) labels = [] for variable in data.variables: #print(variable + ":" + str(data[variable].shape)) labels.append(variable) for i in range(0, 24): labels.append('anthro_surr_emis_{:02d}'.format(i)) labels.append('lightning_surr_emis_{:02d}'.format(i)) labels.append('total_lightning') data_dict = {label: [] for label in labels} extra_vars = ['xlon', 'xlat', 'hour', 'date', 'IC_FLASHCOUNT', 'CG_FLASHCOUNT', 'E_NO', 'U', 'V'] #print('Variables require extra processing steps: {}'.format(extra_vars[:])) dims = data['no2'].shape ntime = dims[0]-1 ngrid = len(keep_indx) nvel = dims[2] data_hours = da.array(data['hour'][1:], dtype='float32') data_dict['hour'] = da.repeat(da.repeat(data_hours[:, :, np.newaxis], ngrid, axis=1), nvel, axis=2) xlon = da.array(data['xlon'][:], dtype='float32').flatten()[np.newaxis, keep_indx, np.newaxis] data_dict['xlon'] = da.repeat(da.repeat(xlon, ntime, axis=0), nvel, axis=2) xlat = da.array(data['xlat'][:], dtype='float32').flatten()[np.newaxis, keep_indx, np.newaxis] data_dict['xlat'] = da.repeat(da.repeat(xlat, ntime, axis=0), nvel, axis=2) data_dict['date'] = da.zeros((ntime, ngrid, nvel)) + da.mean(data['date'][:], dtype='float32') data_dict['date'] = data_dict['date'] cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:], dtype='float32') cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:], dtype='float32') ic_flash = da.repeat(cum_ic_flash[1:, :, :]-cum_ic_flash[:-1, :, :], nvel, axis=2) cg_flash = da.repeat(cum_cg_flash[1:, :, :]-cum_cg_flash[:-1, :, :], nvel, axis=2) e_lightning = ic_flash + cg_flash data_dict['IC_FLASHCOUNT'] = ic_flash[:, keep_indx, :] data_dict['CG_FLASHCOUNT'] = cg_flash[:, keep_indx, :] data_dict['total_lightning'] = e_lightning[:, keep_indx, :] e_no_lower = da.array(data['E_NO'], dtype='float32')[1:, :, :] e_no_upper = da.zeros((ntime, e_no_lower.shape[1], nvel - e_no_lower.shape[2]), dtype='float32') e_no = da.concatenate([e_no_lower, e_no_upper], axis=2) data_dict['E_NO'] = e_no[:, keep_indx, :] for i in range(0, 24): this_label = 'anthro_surr_emis_{:02d}'.format(i) surr_indx = surr_arr[:, i] data_dict[this_label] = e_no[:, surr_indx, :] this_label = 'lightning_surr_emis_{:02d}'.format(i) data_dict[this_label] = e_lightning[:, surr_indx, :] stg_u = da.array(data['U'], dtype='float32') stg_v = da.array(data['V'], dtype='float32') u_indx_left, u_indx_right, v_indx_bot, v_indx_up = find_indx_for_wind() wind_u = (stg_u[1:, u_indx_left, :] + stg_u[1:, u_indx_right, :])/2 data_dict['U'] = wind_u[:, keep_indx, :] wind_v = (stg_v[1:, v_indx_up, :] + stg_v[1:, v_indx_bot, :])/2 data_dict['V'] = wind_v[:, keep_indx, :] match_vars = ['no2', 'pres', 'temp', 'CLDFRA'] #print('Variables read directly from wrf: {}'.format(match_vars[:])) for var in match_vars: data_dict[var] = da.array(data[var], dtype='float32')[1:, keep_indx, :] reduce_dim_vars = ['elev', 'W'] #print('Variables average vertically: {}'.format(reduce_dim_vars[:])) for var in reduce_dim_vars: this_value = da.array(data[var], dtype='float32')[1:, keep_indx, :] data_dict[var] = (this_value[:, :, 1:] + this_value[:, :, :-1])/2 add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW'] #print('Variables add vertical layers: {}'.format(add_dim_vars[:])) for var in add_dim_vars: this_value = da.array(data[var], dtype='float32')[1:, keep_indx, :] data_dict[var] = da.repeat(this_value, nvel, axis=2) #print('Key of dict:{}'.format(data_dict.keys())) additional_features = ['xlon', 'xlat', 'date', 'elev', 'hour', 'IC_FLASHCOUNT', 'CG_FLASHCOUNT'] y_label = ['no2'] x_labels = [label for label in labels if label not in additional_features and label not in y_label] additional_arr = [] x_arr = [] y_arr = [] for var in labels: #print('Reading this variable:{}'.format(var)) this_value = data_dict[var].flatten() if var in additional_features: additional_arr.append(this_value) elif var in x_labels: x_arr.append(this_value.compute()) elif var in y_label: y_arr.append(this_value.compute()) return additional_arr, x_arr, y_arr, x_labels, additional_features
def _get_test_calib_for_channel_vis(self, chroot, meas): data = super()._get_test_calib_for_channel_vis(chroot, meas) data["state/celestial/earth_sun_distance"] = xr.DataArray(da.repeat(da.array([30000000]), 6000)) return data
def activations(images, labels, batch_size, model, layer_regex, nodaug_params, daug_params, include_input=False, class_invariance=False, n_daug_rep=0, norms=['fro']): """ Computes metrics from the activations, such as the norm of the feature maps, data augmentation invariance, class invariance, etc. Parameters ---------- images : h5py Dataset The set of images labels : h5py Dataset The ground truth labels batch_size : int Batch size model : Keras Model The model nodaug_params : dict Dictionary of data augmentation parameters for the baseline daug_params : dict Dictionary of data augmentation parameters include_input : bool If True, the input layer is considered for the analysis class_invariance : bool If True, the class invariance score is computed n_daug_rep : int If larger than 0, the data augentation invariance score is computed, performing n_daug_rep repetitions of random augmentations norms : list List of keywords to specify the types of norms to compute on the activations Returns ------- results_dict : dict Dictionary containing some performance metrics """ def _update_stats(mean_norm, std_norm, norm): mean_norm_batch = np.mean(norm, axis=0) std_norm_batch = np.std(norm, axis=0) mean_norm = init / float(end) * mean_norm + \ batch_size / float(end) * mean_norm_batch std_norm = init / float(end) * std_norm ** 2 + \ batch_size / float(end) * std_norm_batch ** 2 + \ (init * batch_size) / float(end ** 2) * \ (mean_norm - mean_norm_batch) ** 2 std_norm = np.sqrt(std_norm) return mean_norm, std_norm def _frobenius_norm(activations): norm = np.linalg.norm( activations, ord='fro', axis=tuple(range(1, len(activations.shape) - 1))) return norm def _inf_norm(activations): norm = np.max(np.abs(activations), axis=tuple(range(1, len(activations.shape) - 1))) return norm model = del_extra_nodes(model) n_images = images.shape[0] n_batches_per_epoch = int(np.ceil(float(n_images) / batch_size)) # Get relevant layers if include_input: layer_regex = '({}|.*input.*)'.format(layer_regex) else: layer_regex = layer_regex layers = [layer.name for layer in model.layers if re.compile(layer_regex).match(layer.name)] # Initialize HDF5 to store the activations # filename = 'hdf5_aux_{}'.format(time.time()) # activations_hdf5_aux = h5py.File(filename, 'w') # hdf5_aux = [filename] # # grp_activations = activations_hdf5_aux.create_group('activations') if class_invariance: # grp_labels = activations_hdf5_aux.create_group('labels') labels_true_da = [] labels_pred_da = [] predictions_da = [] # labels_true = grp_labels.create_dataset( # 'labels_true', shape=(n_images, ), dtype=np.uint8) # labels_pred = grp_labels.create_dataset( # 'labels_pred', shape=(n_images, ), dtype=np.uint8) # predictions = grp_labels.create_dataset( # 'predictions', shape=labels.shape, dtype=K.floatx()) idx_softmax = model.output_names.index('softmax') store_labels = True else: store_labels = False # Initialize results dictionary results_dict = {'activations_norm': {}, 'summary': {}, 'class_invariance': {}, 'daug_invariance': {}} # Iterate over the layers for layer_name in layers: # Create batch generator image_gen = get_generator(images, **nodaug_params) batch_gen = generate_batches(image_gen, images, labels, batch_size, aug_per_im=1, shuffle=False) layer = model.get_layer(layer_name) layer_shape = layer.output_shape[1:] n_channels = layer_shape[-1] if re.compile('.*input.*').match(layer_name): layer_name = 'input' print('\nLayer {}\n'.format(layer_name)) # Create a Dataset for the activations of the layer # activations_layer = grp_activations.create_dataset( # layer_name, shape=(n_images, ) + layer_shape, # dtype=K.floatx()) # Create dask array for the activations of the layer activations_layer_da = [] # Initialize placeholders in the results dict for the layer results_dict['activations_norm'].update({layer_name: {n: {'mean': np.zeros(n_channels), 'std': np.zeros(n_channels)} for n in norms}}) layer_dict = results_dict['activations_norm'][layer_name] activation_function = K.function([model.input, K.learning_phase()], [layer.output]) # Iterate over the data set in batches init = 0 for batch_images, batch_labels in tqdm( batch_gen, total=n_batches_per_epoch): batch_size = batch_images.shape[0] end = init + batch_size # Store labels if store_labels: preds = model.predict_on_batch(batch_images) if isinstance(preds, list): preds = preds[idx_softmax] labels_pred_da.append(da.from_array( np.argmax(preds, axis=1))) labels_true_da.append(da.from_array( np.argmax(batch_labels, axis=1))) predictions_da.append(da.from_array(preds)) # labels_pred[init:end] = np.argmax(preds, axis=1) # labels_true[init:end] = np.argmax(batch_labels, axis=1) # predictions[init:end, :] = preds # Get and store activations activations = activation_function([batch_images, 0])[0] activations_layer_da.append(da.from_array( activations, chunks=activations.shape)) # activations_layer[init:end] = activations # Compute norms for norm_key in norms: mean_norm = layer_dict[norm_key]['mean'] std_norm = layer_dict[norm_key]['std'] if norm_key == 'fro': norm = _frobenius_norm(activations) elif norm_key == 'inf': norm = _inf_norm(activations) else: raise NotImplementedError('Implemented norms are fro ' 'and inf') mean_norm, std_norm = _update_stats(mean_norm, std_norm, norm) layer_dict[norm_key]['mean'] = mean_norm layer_dict[norm_key]['std'] = std_norm init = end if init == n_images: store_labels = False break # Concatenate dask arrays activations_layer_da = da.concatenate(activations_layer_da, axis=0) activations_layer_da = activations_layer_da.reshape((n_images, -1)) d_activations = activations_layer_da.shape[-1] if class_invariance: print('\nComputing class invariance\n') labels_pred_da = da.concatenate(labels_pred_da) labels_true_da = da.concatenate(labels_true_da) predictions_da = da.concatenate(predictions_da) n_classes = len(np.unique(labels_true_da)) # Compute MSE matrix of the activations r = da.reshape(da.sum(da.square(activations_layer_da), axis=1), (-1, 1)) mse_matrix_da = (r - 2 * da.dot(activations_layer_da, da.transpose(activations_layer_da)) \ + da.transpose(r)) / d_activations mse_matrix_da = mse_matrix_da.rechunk((mse_matrix_da.chunksize[0], mse_matrix_da.shape[-1])) # Compute class invariance time0 = time() results_dict['class_invariance'].update({layer_name: {}}) class_invariance_scores_da = [] if class_invariance: # mse_matrix_mean = da.mean(mse_matrix_da).compute() for cl in tqdm(range(n_classes)): labels_cl = labels_pred_da == cl labels_cl = labels_cl.compute() mse_class = mse_matrix_da[labels_cl, :][:, labels_cl] mse_class = mse_class.rechunk((-1, -1)) # mse_class_mean = da.mean(mse_class).compute() # class_invariance_score = 1. - np.divide( # mse_class_mean, mse_matrix_mean) # results_dict['class_invariance'][layer_name].update( # {cl: class_invariance_score}) class_invariance_scores_da.append( 1. - da.divide(da.mean(mse_class), da.mean(mse_matrix_da))) # Compute data augmentation invariance print('\nComputing data augmentation invariance\n') mse_daug_da = [] results_dict['daug_invariance'].update({layer_name: {}}) for r in range(n_daug_rep): print('Repetition {}'.format(r)) image_gen_daug = get_generator(images, **daug_params) batch_gen_daug = generate_batches(image_gen_daug, images, labels, batch_size, aug_per_im=1, shuffle=False) activations_layer_daug_da = [] # Iterate over the data set in batches to compute activations init = 0 for batch_images, batch_labels in tqdm( batch_gen, total=n_batches_per_epoch): batch_size = batch_images.shape[0] end = init + batch_size # Get and store activations activations = activation_function([batch_images, 0])[0] activations_layer_daug_da.append(da.from_array( activations, chunks=activations.shape)) init = end if init == n_images: break activations_layer_daug_da = da.concatenate( activations_layer_daug_da, axis=0) activations_layer_daug_da = activations_layer_daug_da.reshape( (n_images, -1)) activations_layer_daug_da = activations_layer_daug_da.rechunk( (activations_layer_daug_da.chunksize[0], activations_layer_daug_da.shape[-1])) # Compute MSE daug mse_daug_da.append(da.mean(da.square(activations_layer_da - \ activations_layer_daug_da), axis=1)) mse_daug_da = da.stack(mse_daug_da, axis=1) mse_sum = da.repeat(da.reshape(da.sum(mse_matrix_da, axis=1), (n_images, 1)), n_daug_rep, axis=1) daug_invariance_score_da = 1 - n_images * da.divide(mse_daug_da, mse_sum) time1 = time() # Compute dask results and update results dict results_dask = da.compute(class_invariance_scores_da, daug_invariance_score_da) time2 = time() results_dict['class_invariance'][layer_name].update( {cl: cl_inv_score for cl, cl_inv_score in enumerate(results_dask[0])}) results_dict['daug_invariance'].update({layer_name: {r: daug_inv_score for r, daug_inv_score in enumerate(results_dask[1].T)}}) # Compute summary statistics of the norms across the channels for layer, layer_dict in results_dict['activations_norm'].items(): results_dict['summary'].update({layer: {}}) for norm_key, norm_dict in layer_dict.items(): results_dict['summary'][layer].update({norm_key: { 'mean': np.mean(norm_dict['mean']), 'std': np.mean(norm_dict['std'])}}) return results_dict
def _expand_tiepoint_array_1km(self, arr, lines, cols): arr = da.repeat(arr, lines, axis=1) arr = da.concatenate((arr[:, :lines//2, :], arr, arr[:, -(lines//2):, :]), axis=1) arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) return da.hstack((arr, arr[:, -cols:]))
def test_write_bw_colormap(): """Test saving a BW image with a colormap. Albedo with a colormap. Reflectances are 0, 29.76, 60, 90.24, 120. """ area = STEREOGRAPHIC_AREA scale = 1.0 / 120 offset = 0.0 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', '%'), ('name', '1'), ('level', None), ('modifiers', ()), ('wavelength', (0.5, 0.6, 0.7)), ('calibration', 'reflectance'), ('start_time', TIME - datetime.timedelta(minutes=75)), ('end_time', TIME - datetime.timedelta(minutes=70)), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{'offset': offset, 'scale': scale}])]) cm_vis = [0, 4095, 5887, 7167, 8191, 9215, 9983, 10751, 11519, 12287, 12799, 13567, 14079, 14847, 15359, 15871, 16383, 16895, 17407, 17919, 18175, 18687, 19199, 19711, 19967, 20479, 20735, 21247, 21503, 22015, 22271, 22783, 23039, 23551, 23807, 24063, 24575, 24831, 25087, 25599, 25855, 26111, 26367, 26879, 27135, 27391, 27647, 27903, 28415, 28671, 28927, 29183, 29439, 29695, 29951, 30207, 30463, 30975, 31231, 31487, 31743, 31999, 32255, 32511, 32767, 33023, 33279, 33535, 33791, 34047, 34303, 34559, 34559, 34815, 35071, 35327, 35583, 35839, 36095, 36351, 36607, 36863, 37119, 37119, 37375, 37631, 37887, 38143, 38399, 38655, 38655, 38911, 39167, 39423, 39679, 39935, 39935, 40191, 40447, 40703, 40959, 40959, 41215, 41471, 41727, 41983, 41983, 42239, 42495, 42751, 42751, 43007, 43263, 43519, 43519, 43775, 44031, 44287, 44287, 44543, 44799, 45055, 45055, 45311, 45567, 45823, 45823, 46079, 46335, 46335, 46591, 46847, 46847, 47103, 47359, 47615, 47615, 47871, 48127, 48127, 48383, 48639, 48639, 48895, 49151, 49151, 49407, 49663, 49663, 49919, 50175, 50175, 50431, 50687, 50687, 50943, 50943, 51199, 51455, 51455, 51711, 51967, 51967, 52223, 52223, 52479, 52735, 52735, 52991, 53247, 53247, 53503, 53503, 53759, 54015, 54015, 54271, 54271, 54527, 54783, 54783, 55039, 55039, 55295, 55551, 55551, 55807, 55807, 56063, 56319, 56319, 56575, 56575, 56831, 56831, 57087, 57343, 57343, 57599, 57599, 57855, 57855, 58111, 58367, 58367, 58623, 58623, 58879, 58879, 59135, 59135, 59391, 59647, 59647, 59903, 59903, 60159, 60159, 60415, 60415, 60671, 60671, 60927, 60927, 61183, 61439, 61439, 61695, 61695, 61951, 61951, 62207, 62207, 62463, 62463, 62719, 62719, 62975, 62975, 63231, 63231, 63487, 63487, 63743, 63743, 63999, 63999, 64255, 64255, 64511, 64511, 64767, 64767, 65023, 65023, 65279] kwargs = {'ch_min_measurement_unit': np.array([0]), 'ch_max_measurement_unit': np.array([120]), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': '%', 'nbits': 8, 'cmap': [cm_vis] * 3} data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1], 1024).reshape((1, 1024, 1024))[:, :1024] data = xr.DataArray(data, coords={'bands': ['L']}, dims=[ 'bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) colormap, res = _load_file_values_with_colormap(filename) assert(len(colormap) == 768) assert(np.allclose(colormap[:256], cm_vis)) assert(np.allclose(colormap[256:512], cm_vis)) assert(np.allclose(colormap[512:], cm_vis)) assert(np.allclose(res[0, ::205], np.array([1, 64, 128, 192, 255])))
def _expand_tiepoint_array_5km(self, arr, lines, cols): arr = da.repeat(arr, lines * 2, axis=1) arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) return da.hstack((arr[:, :2], arr, arr[:, -2:]))
positiveInd(iLims2, L)) fStEn2bool = lambda iStEn, length: da.hstack( [(da.ones(iEn2iSt, dtype=np.bool8) if b else da.zeros(iEn2iSt, dtype=np.bool8)) for iEn2iSt, b in da.vstack(( da.diff( da.hstack( ( 0, iStEn.flat, length))), da.hstack( ( da.repeat( [ ( False, True)], np.size( iStEn, 0), 0).flat, False)))).T]) TimeShift_Log_sec = 60 kVabs = np.float64([[0.361570991503], [0]]) # @-<<Castom defenitions>> # @+<<loading>> # @+node:korzh.20180525121734.1: ** <<loading>> # @+others # @+node:korzh.20180526160931.1: *3* coef """ Load or set default
def test_write_bw_fill(): """Test saving a BW image with transparency.""" from pyninjotiff.ninjotiff import save from pyninjotiff.tifffile import TiffFile area = FakeArea( { 'ellps': 'WGS84', 'lat_0': 90.0, 'lat_ts': 60.0, 'lon_0': 0.0, 'proj': 'stere' }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024) scale = 1.0 / 120 offset = 0.0 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', '%'), ('name', '1'), ('level', None), ('modifiers', ()), ('wavelength', (0.5, 0.6, 0.7)), ('calibration', 'reflectance'), ('start_time', TIME - datetime.timedelta(minutes=25)), ('end_time', TIME - datetime.timedelta(minutes=20)), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{ 'offset': offset, 'scale': scale }])]) kwargs = { 'ch_min_measurement_unit': np.array([0]), 'ch_max_measurement_unit': np.array([120]), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': '%', 'nbits': 8 } data1 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 256).reshape((1, 256, 1024)) datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan data2 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256), 512).reshape((1, 512, 1024)) data = da.concatenate((data1, datanan, data2), axis=1) data = xr.DataArray(data, coords={'bands': ['L']}, dims=['bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) page = tif[0] res = page.asarray(colormapped=False).squeeze() colormap = page.tags['color_map'].value for i in range(3): assert (np.all( np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) * 256)) assert (np.all(res[0, ::256] == np.array([1, 86, 170, 255]))) assert (np.all(res[256, :] == 0))
def test_write_ir_colormap(): """Test saving a IR image with a colormap. IR with a colormap. Temperatures are -70, -40.24, -10, 20.24, 50. """ from pyninjotiff.ninjotiff import save from pyninjotiff.tifffile import TiffFile area = FakeArea( { 'ellps': 'WGS84', 'lat_0': 90.0, 'lat_ts': 60.0, 'lon_0': 0.0, 'proj': 'stere' }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024) scale = 1.0 / 120 offset = 70.0 / 120 attrs = dict([('resolution', 1050), ('polarization', None), ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'), ('units', 'K'), ('name', '4'), ('level', None), ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)), ('calibration', 'brightness_temperature'), ('start_time', TIME - datetime.timedelta(minutes=85)), ('end_time', TIME - datetime.timedelta(minutes=80)), ('area', area), ('ancillary_variables', []), ('enhancement_history', [{ 'offset': offset, 'scale': scale }])]) ir_map = [ 255, 1535, 2559, 3327, 4095, 4863, 5375, 5887, 6399, 6911, 7423, 7935, 8447, 8959, 9471, 9983, 10239, 10751, 11263, 11519, 12031, 12287, 12799, 13055, 13567, 13823, 14335, 14591, 14847, 15359, 15615, 16127, 16383, 16639, 17151, 17407, 17663, 17919, 18431, 18687, 18943, 19199, 19711, 19967, 20223, 20479, 20735, 21247, 21503, 21759, 22015, 22271, 22527, 22783, 23295, 23551, 23807, 24063, 24319, 24575, 24831, 25087, 25343, 25599, 25855, 26367, 26623, 26879, 27135, 27391, 27647, 27903, 28159, 28415, 28671, 28927, 29183, 29439, 29695, 29951, 30207, 30463, 30719, 30975, 31231, 31487, 31743, 31999, 31999, 32255, 32511, 32767, 33023, 33279, 33535, 33791, 34047, 34303, 34559, 34815, 35071, 35327, 35327, 35583, 35839, 36095, 36351, 36607, 36863, 37119, 37375, 37375, 37631, 37887, 38143, 38399, 38655, 38911, 39167, 39167, 39423, 39679, 39935, 40191, 40447, 40703, 40703, 40959, 41215, 41471, 41727, 41983, 41983, 42239, 42495, 42751, 43007, 43263, 43263, 43519, 43775, 44031, 44287, 44287, 44543, 44799, 45055, 45311, 45311, 45567, 45823, 46079, 46335, 46335, 46591, 46847, 47103, 47359, 47359, 47615, 47871, 48127, 48127, 48383, 48639, 48895, 49151, 49151, 49407, 49663, 49919, 49919, 50175, 50431, 50687, 50687, 50943, 51199, 51455, 51455, 51711, 51967, 52223, 52223, 52479, 52735, 52991, 52991, 53247, 53503, 53759, 53759, 54015, 54271, 54527, 54527, 54783, 55039, 55039, 55295, 55551, 55807, 55807, 56063, 56319, 56319, 56575, 56831, 57087, 57087, 57343, 57599, 57599, 57855, 58111, 58367, 58367, 58623, 58879, 58879, 59135, 59391, 59391, 59647, 59903, 60159, 60159, 60415, 60671, 60671, 60927, 61183, 61183, 61439, 61695, 61695, 61951, 62207, 62463, 62463, 62719, 62975, 62975, 63231, 63487, 63487, 63743, 63999, 63999, 64255, 64511, 64511, 64767, 65023, 65023, 65279 ] kwargs = { 'ch_min_measurement_unit': np.array([-70]), 'ch_max_measurement_unit': np.array([50]), 'compute': True, 'fill_value': None, 'sat_id': 6300014, 'chan_id': 900015, 'data_cat': 'P**N', 'data_source': 'SMHI', 'physic_unit': 'C', 'nbits': 8, 'cmap': [ir_map] * 3 } data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1], 1024).reshape((1, 1024, 1024))[:, :1024] data = xr.DataArray(data, coords={'bands': ['L']}, dims=['bands', 'y', 'x'], attrs=attrs) img = FakeImage(data) with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile: filename = tmpfile.name if not DELETE_FILES: print(filename) save(img, filename, data_is_scaled_01=True, **kwargs) tif = TiffFile(filename) page = tif[0] res = page.asarray(colormapped=False).squeeze() colormap = page.tags['color_map'].value assert (len(colormap) == 768) assert (np.allclose(colormap[:256], ir_map)) assert (np.allclose(colormap[256:512], ir_map)) assert (np.allclose(colormap[512:], ir_map)) assert (np.allclose(res[0, ::205], np.array([1, 64, 128, 192, 255])))