def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y): """Calculate vertical and horizontal fractional distances t and s""" # General case, ie. where the the corners form an irregular rectangle t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) # Cases where verticals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) # Cases where both verticals and horizontals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1) t__ = da.where(idxs, np.nan, t__) s__ = da.where(idxs, np.nan, s__) return t__, s__
def test_get_corner_dask(self): """Test finding the closest corners.""" import dask.array as da from pyresample.bilinear.xarr import (_get_corner_dask, _get_input_xy_dask) from pyresample import CHUNK_SIZE from pyresample._spatial_mp import Proj proj = Proj(self.target_def.proj_str) in_x, in_y = _get_input_xy_dask(self.source_def, proj, da.from_array(self.valid_input_index), da.from_array(self.index_array)) out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE) out_x = da.ravel(out_x) out_y = da.ravel(out_y) # Some copy&paste from the code to get the input out_x_tile = np.reshape(np.tile(out_x, self.neighbours), (self.neighbours, out_x.size)).T out_y_tile = np.reshape(np.tile(out_y, self.neighbours), (self.neighbours, out_y.size)).T x_diff = out_x_tile - in_x y_diff = out_y_tile - in_y stride = np.arange(x_diff.shape[0]) # Use lower left source pixels for testing valid = (x_diff > 0) & (y_diff > 0) x_3, y_3, idx_3 = _get_corner_dask(stride, valid, in_x, in_y, da.from_array(self.index_array)) self.assertTrue( x_3.shape == y_3.shape == idx_3.shape == (self.target_def.size, )) # Four locations have no data to the lower left of them (the # bottom row of the area self.assertEqual(np.sum(np.isnan(x_3.compute())), 4)
def _get_input_xy_dask(source_geo_def, proj, input_idxs, idx_ref): """Get x/y coordinates for the input area and reduce the data.""" in_lons, in_lats = source_geo_def.get_lonlats_dask() # Mask invalid values in_lons, in_lats = _mask_coordinates_dask(in_lons, in_lats) # Select valid locations # TODO: direct indexing w/o .compute() results in # "ValueError: object too deep for desired array in_lons = da.ravel(in_lons) in_lons = in_lons.compute() in_lons = in_lons[input_idxs] in_lats = da.ravel(in_lats) in_lats = in_lats.compute() in_lats = in_lats[input_idxs] # Expand input coordinates for each output location # in_lons = in_lons.compute() in_lons = in_lons[idx_ref] # in_lats = in_lats.compute() in_lats = in_lats[idx_ref] # Convert coordinates to output projection x/y space in_x, in_y = proj(in_lons, in_lats) return in_x, in_y
def test_get_bounding_corners_dask(self): """Test finding surrounding bounding corners.""" import dask.array as da from pyresample.bilinear.xarr import (_get_input_xy_dask, _get_bounding_corners_dask) from pyresample._spatial_mp import Proj from pyresample import CHUNK_SIZE proj = Proj(self.target_def.proj_str) out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE) out_x = da.ravel(out_x) out_y = da.ravel(out_y) in_x, in_y = _get_input_xy_dask(self.source_def, proj, da.from_array(self.valid_input_index), da.from_array(self.index_array)) pt_1, pt_2, pt_3, pt_4, ia_ = _get_bounding_corners_dask( in_x, in_y, out_x, out_y, self.neighbours, da.from_array(self.index_array)) self.assertTrue(pt_1.shape == pt_2.shape == pt_3.shape == pt_4.shape == (self.target_def.size, 2)) self.assertTrue(ia_.shape == (self.target_def.size, 4)) # Check which of the locations has four valid X/Y pairs by # finding where there are non-NaN values res = da.sum(pt_1 + pt_2 + pt_3 + pt_4, axis=1).compute() self.assertEqual(np.sum(~np.isnan(res)), 10)
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y): """Calculate vertical and horizontal fractional distances t and s""" # General case, ie. where the the corners form an irregular rectangle t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) # Cases where verticals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) # Cases where both verticals and horizontals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1) t__ = da.where(idxs, np.nan, t__) s__ = da.where(idxs, np.nan, s__) return t__, s__
def _get_input_xy_dask(source_geo_def, proj, input_idxs, idx_ref): """Get x/y coordinates for the input area and reduce the data.""" in_lons, in_lats = source_geo_def.get_lonlats_dask() # Mask invalid values in_lons, in_lats = _mask_coordinates_dask(in_lons, in_lats) # Select valid locations # TODO: direct indexing w/o .compute() results in # "ValueError: object too deep for desired array in_lons = da.ravel(in_lons) in_lons = in_lons.compute() in_lons = in_lons[input_idxs] in_lats = da.ravel(in_lats) in_lats = in_lats.compute() in_lats = in_lats[input_idxs] # Expand input coordinates for each output location # in_lons = in_lons.compute() in_lons = in_lons[idx_ref] # in_lats = in_lats.compute() in_lats = in_lats[idx_ref] # Convert coordinates to output projection x/y space in_x, in_y = proj(in_lons, in_lats) return in_x, in_y
def _get_raveled_lonlats(geo_def): lons, lats = geo_def.get_lonlats(chunks=CHUNK_SIZE) if lons.size == 0 or lats.size == 0: raise ValueError('Cannot resample empty data set') elif lons.size != lats.size or lons.shape != lats.shape: raise ValueError('Mismatch between lons and lats') return da.ravel(lons), da.ravel(lats)
def test_ravel(): x = np.random.randint(10, size=(4, 6)) # 2d # these should use the shortcut for chunks in [(4, 6), (2, 6)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0]) # these cannot for chunks in [(4, 2), (2, 2)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) > len(a.dask) + len(a.chunks[0]) # 0d assert eq(x[0, 0].ravel(), a[0, 0].ravel()) # 1d a_flat = a.ravel() assert a_flat.ravel() is a_flat # 3d x = np.random.randint(10, size=(2, 3, 4)) for chunks in [2, 4, (2, 3, 2), (1, 3, 4)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert eq(x.flatten(), a.flatten()) assert eq(np.ravel(x), da.ravel(a))
def query_no_distance(target_lons, target_lats, valid_output_index, kdtree, neighbours, epsilon, radius): """Query the kdtree. No distances are returned.""" voi = valid_output_index shape = voi.shape voir = da.ravel(voi) target_lons_valid = da.ravel(target_lons)[voir] target_lats_valid = da.ravel(target_lats)[voir] coords = lonlat2xyz(target_lons_valid, target_lats_valid) distance_array, index_array = kdtree.query(coords.compute(), k=neighbours, eps=epsilon, distance_upper_bound=radius) return index_array
def _create_resample_kdtree(self): """Set up kd tree on input""" # Get input information valid_input_index, source_lons, source_lats = \ _get_valid_input_index_dask(self.source_geo_def, self.target_geo_def, self.reduce_data, self.radius_of_influence, nprocs=self.nprocs) # FIXME: Is dask smart enough to only compute the pixels we end up # using even with this complicated indexing input_coords = lonlat2xyz(source_lons, source_lats) valid_input_index = da.ravel(valid_input_index) input_coords = input_coords[valid_input_index, :] input_coords = input_coords.compute() # Build kd-tree on input input_coords = input_coords.astype(np.float) valid_input_index, input_coords = da.compute(valid_input_index, input_coords) if kd_tree_name == 'pykdtree': resample_kdtree = KDTree(input_coords) else: resample_kdtree = sp.cKDTree(input_coords) return valid_input_index, resample_kdtree
def test_ravel(): x = np.random.randint(10, size=(4, 6)) # 2d # these should use the shortcut for chunks in [(4, 6), (2, 6)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0]) # these cannot for chunks in [(4, 2), (2, 2)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) > len(a.dask) + len(a.chunks[0]) # 0d assert eq(x[0, 0].ravel(), a[0, 0].ravel()) # 1d a_flat = a.ravel() assert a_flat.ravel() is a_flat # 3d x = np.random.randint(10, size=(2, 3, 4)) for chunks in [2, 4, (2, 3, 2), (1, 3, 4)]: a = from_array(x, chunks=chunks) assert eq(x.ravel(), a.ravel()) assert eq(x.flatten(), a.flatten()) assert eq(np.ravel(x), da.ravel(a))
def _get_valid_input_index_dask(source_geo_def, target_geo_def, reduce_data, radius_of_influence, nprocs=1): """Find indices of reduced inputput data""" source_lons, source_lats = source_geo_def.get_lonlats_dask() source_lons = da.ravel(source_lons) source_lats = da.ravel(source_lats) if source_lons.size == 0 or source_lats.size == 0: raise ValueError('Cannot resample empty data set') elif source_lons.size != source_lats.size or \ source_lons.shape != source_lats.shape: raise ValueError('Mismatch between lons and lats') # Remove illegal values valid_input_index = ((source_lons >= -180) & (source_lons <= 180) & (source_lats <= 90) & (source_lats >= -90)) if reduce_data: # Reduce dataset if (isinstance(source_geo_def, geometry.CoordinateDefinition) and isinstance(target_geo_def, (geometry.GridDefinition, geometry.AreaDefinition))) or \ (isinstance(source_geo_def, (geometry.GridDefinition, geometry.AreaDefinition)) and isinstance(target_geo_def, (geometry.GridDefinition, geometry.AreaDefinition))): # Resampling from swath to grid or from grid to grid lonlat_boundary = target_geo_def.get_boundary_lonlats() # Combine reduced and legal values valid_input_index &= \ data_reduce.get_valid_index_from_lonlat_boundaries( lonlat_boundary[0], lonlat_boundary[1], source_lons, source_lats, radius_of_influence) if (isinstance(valid_input_index, np.ma.core.MaskedArray)): # Make sure valid_input_index is not a masked array valid_input_index = valid_input_index.filled(False) return valid_input_index, source_lons, source_lats
def query_no_distance(target_lons, target_lats, valid_output_index, kdtree, neighbours, epsilon, radius): """Query the kdtree. No distances are returned.""" voi = valid_output_index shape = voi.shape voir = da.ravel(voi) target_lons_valid = da.ravel(target_lons)[voir] target_lats_valid = da.ravel(target_lats)[voir] coords = lonlat2xyz(target_lons_valid, target_lats_valid) distance_array, index_array = kdtree.query( coords.compute(), k=neighbours, eps=epsilon, distance_upper_bound=radius) return index_array
def _get_valid_input_index_dask(source_geo_def, target_geo_def, reduce_data, radius_of_influence, nprocs=1): """Find indices of reduced inputput data""" source_lons, source_lats = source_geo_def.get_lonlats_dask() source_lons = da.ravel(source_lons) source_lats = da.ravel(source_lats) if source_lons.size == 0 or source_lats.size == 0: raise ValueError('Cannot resample empty data set') elif source_lons.size != source_lats.size or \ source_lons.shape != source_lats.shape: raise ValueError('Mismatch between lons and lats') # Remove illegal values valid_input_index = ((source_lons >= -180) & (source_lons <= 180) & (source_lats <= 90) & (source_lats >= -90)) if reduce_data: # Reduce dataset if (isinstance(source_geo_def, geometry.CoordinateDefinition) and isinstance(target_geo_def, (geometry.GridDefinition, geometry.AreaDefinition))) or \ (isinstance(source_geo_def, (geometry.GridDefinition, geometry.AreaDefinition)) and isinstance(target_geo_def, (geometry.GridDefinition, geometry.AreaDefinition))): # Resampling from swath to grid or from grid to grid lonlat_boundary = target_geo_def.get_boundary_lonlats() # Combine reduced and legal values valid_input_index &= \ data_reduce.get_valid_index_from_lonlat_boundaries( lonlat_boundary[0], lonlat_boundary[1], source_lons, source_lats, radius_of_influence) if (isinstance(valid_input_index, np.ma.core.MaskedArray)): # Make sure valid_input_index is not a masked array valid_input_index = valid_input_index.filled(False) return valid_input_index, source_lons, source_lats
def _get_output_xy_dask(target_geo_def, proj): """Get x/y coordinates of the target grid.""" # Read output coordinates out_lons, out_lats = target_geo_def.get_lonlats_dask() # Mask invalid coordinates out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats) # Convert coordinates to output projection x/y space res = da.dstack(proj(out_lons.compute(), out_lats.compute())) # _run_proj(proj, out_lons, out_lats) #, # chunks=(out_lons.chunks[0], out_lons.chunks[1], 2), # new_axis=[2]) out_x = da.ravel(res[:, :, 0]) out_y = da.ravel(res[:, :, 1]) return out_x, out_y
def _get_output_xy_dask(target_geo_def, proj): """Get x/y coordinates of the target grid.""" # Read output coordinates out_lons, out_lats = target_geo_def.get_lonlats_dask() # Mask invalid coordinates out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats) # Convert coordinates to output projection x/y space res = da.dstack(proj(out_lons.compute(), out_lats.compute())) # _run_proj(proj, out_lons, out_lats) #, # chunks=(out_lons.chunks[0], out_lons.chunks[1], 2), # new_axis=[2]) out_x = da.ravel(res[:, :, 0]) out_y = da.ravel(res[:, :, 1]) return out_x, out_y
def _check_data_shape_dask(data, input_idxs): """Check data shape and adjust if necessary.""" # Handle multiple datasets if data.ndim > 2 and data.shape[0] * data.shape[1] == input_idxs.shape[0]: data = da.reshape(data, data.shape[0] * data.shape[1], data.shape[2]) # Also ravel single dataset elif data.shape[0] != input_idxs.size: data = da.ravel(data) # Ensure two dimensions if data.ndim == 1: data = da.reshape(data, (data.size, 1)) return data
def _check_data_shape_dask(data, input_idxs): """Check data shape and adjust if necessary.""" # Handle multiple datasets if data.ndim > 2 and data.shape[0] * data.shape[1] == input_idxs.shape[0]: data = da.reshape(data, data.shape[0] * data.shape[1], data.shape[2]) # Also ravel single dataset elif data.shape[0] != input_idxs.size: data = da.ravel(data) # Ensure two dimensions if data.ndim == 1: data = da.reshape(data, (data.size, 1)) return data
def _contains_cftime_datetimes(array): """Check if an array contains cftime.datetime objects""" try: from cftime import datetime as cftime_datetime except ImportError: return False else: if array.dtype == np.dtype('O') and array.size > 0: sample = array.ravel()[0] if isinstance(sample, dask_array_type): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() return isinstance(sample, cftime_datetime) else: return False
def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects """ try: from cftime import datetime as cftime_datetime except ImportError: return False else: if array.dtype == np.dtype('O') and array.size > 0: sample = array.ravel()[0] if isinstance(sample, dask_array_type): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() return isinstance(sample, cftime_datetime) else: return False
def anomalies(cube, period): """ Compute anomalies using a mean with the specified granularity. Computes anomalies based on daily, monthly, seasonal or yearly means for the full available period Parameters ---------- cube: iris.cube.Cube input cube. period: str, optional Period to compute the statistic over. Available periods: 'full', 'season', 'seasonal', 'monthly', 'month', 'mon', 'daily', 'day' Returns ------- iris.cube.Cube Monthly statistics cube """ reference = climate_statistics(cube, period=period) if period in ['full']: return cube - reference cube_coord = _get_period_coord(cube, period) ref_coord = _get_period_coord(reference, period) data = cube.core_data() cube_time = cube.coord('time') ref = {} for ref_slice in reference.slices_over(ref_coord): ref[ref_slice.coord(ref_coord).points[0]] = da.ravel( ref_slice.core_data()) cube_coord_dim = cube.coord_dims(cube_coord)[0] for i in range(cube_time.shape[0]): time = cube_time.points[i] indexes = cube_time.points == time indexes = iris.util.broadcast_to_shape(indexes, data.shape, (cube_coord_dim, )) data[indexes] = data[indexes] - ref[cube_coord.points[i]] cube = cube.copy(data) return cube
def coords_to_point_array(coords: List[Any]) -> np.ndarray: """Re-arrange data from a list of xarray coordinates into a 2-d array of shape (npoints, ncoords). """ c_chunks = [c.chunks for c in coords] if any([chunks is None for chunks in c_chunks]): # plain numpy arrays (maybe triggers compute) X = np.stack([np.ravel(c) for c in coords]).T else: import dask.array as da # TODO: check chunks are equal for all coords? X = da.stack([da.ravel(c.data) for c in coords]).T X = X.rechunk((X.chunks[0], len(coords))) return X
def _create_resample_kdtree(self): """Set up kd tree on input""" # Get input information valid_input_index, source_lons, source_lats = \ _get_valid_input_index_dask(self.source_geo_def, self.target_geo_def, self.reduce_data, self.radius_of_influence, nprocs=self.nprocs) # FIXME: Is dask smart enough to only compute the pixels we end up # using even with this complicated indexing input_coords = lonlat2xyz(source_lons, source_lats) valid_input_index = da.ravel(valid_input_index) input_coords = input_coords[valid_input_index, :] input_coords = input_coords.compute() # Build kd-tree on input input_coords = input_coords.astype(np.float) valid_input_index, input_coords = da.compute(valid_input_index, input_coords) return valid_input_index, KDTree(input_coords)
def test_ravel(): x = np.random.randint(10, size=(4, 6)) # 2d for chunks in [(4, 6), (2, 6)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0]) # 0d assert_eq(x[0, 0].ravel(), a[0, 0].ravel()) # 1d a_flat = a.ravel() assert_eq(a_flat.ravel(), a_flat) # 3d x = np.random.randint(10, size=(2, 3, 4)) for chunks in [4, (1, 3, 4)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert_eq(x.flatten(), a.flatten()) assert_eq(np.ravel(x), da.ravel(a))
def test_ravel(): x = np.random.randint(10, size=(4, 6)) # 2d for chunks in [(4, 6), (2, 6)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0]) # 0d assert_eq(x[0, 0].ravel(), a[0, 0].ravel()) # 1d a_flat = a.ravel() assert_eq(a_flat.ravel(), a_flat) # 3d x = np.random.randint(10, size=(2, 3, 4)) for chunks in [4, (1, 3, 4)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert_eq(x.flatten(), a.flatten()) assert_eq(np.ravel(x), da.ravel(a))
def main(argv=None): # cluster = LocalCluster(dashboard_address=None) # client = Client(cluster, memory_limit='{}GB'.format(FLAGS.memory_limit), # processes=False) K.set_floatx('float32') chunk_size = FLAGS.chunk_size # Read data set hdf5_file = h5py.File(FLAGS.data_file, 'r') images, labels, _ = hdf52dask(hdf5_file, FLAGS.group, chunk_size, shuffle=FLAGS.shuffle, seed=FLAGS.seed, pct=FLAGS.pct) n_images = images.shape[0] n_batches = int(np.ceil(n_images / float(FLAGS.batch_size))) # Data augmentation parameters daug_params_file = get_daug_scheme_path(FLAGS.daug_params, FLAGS.data_file) daug_params = yaml.load(open(daug_params_file, 'r'), Loader=yaml.FullLoader) nodaug_params_file = get_daug_scheme_path('nodaug.yml', FLAGS.data_file) nodaug_params = yaml.load(open(nodaug_params_file, 'r'), Loader=yaml.FullLoader) # Initialize the network model model_filename = FLAGS.model model = load_model(model_filename) # Print the model summary model.summary() # Get relevant layers if FLAGS.store_input: layer_regex = '({}|.*input.*)'.format(FLAGS.layer_regex) else: layer_regex = FLAGS.layer_regex layers = [ layer.name for layer in model.layers if re.compile(layer_regex).match(layer.name) ] # Create batch generators n_daug_rep = FLAGS.n_daug_rep n_diff_per_batch = int(FLAGS.batch_size / n_daug_rep) image_gen_daug = get_generator(images, **daug_params) batch_gen_daug = batch_generator(image_gen_daug, images, labels, batch_size=n_diff_per_batch, aug_per_im=n_daug_rep, shuffle=False) image_gen_nodaug = get_generator(images, **nodaug_params) batch_gen_nodaug = batch_generator(image_gen_nodaug, images, labels, FLAGS.batch_size, aug_per_im=1, shuffle=False) # Outputs if FLAGS.output_dir == '-1': FLAGS.output_dir = os.path.dirname(FLAGS.model) output_hdf5 = h5py.File( os.path.join(FLAGS.output_dir, FLAGS.output_mse_matrix_hdf5), 'w') output_pickle = os.path.join(FLAGS.output_dir, FLAGS.output_pickle) df_init_idx = 0 df = pd.DataFrame() # Iterate over the layers for layer_idx, layer_name in enumerate(layers): # Reload the model if layer_idx > 0: K.clear_session() model = load_model(model_filename) layer = model.get_layer(layer_name) # Rename input layer if re.compile('.*input.*').match(layer_name): layer_name = 'input' hdf5_layer = output_hdf5.create_group(layer_name) activation_function = K.function( [model.input, K.learning_phase()], [layer.output]) print('\nComputing pairwise similarity at layer {}'.format(layer_name)) # Compute activations of original data (without augmentation) a_nodaug_da = get_activations(activation_function, batch_gen_nodaug) a_nodaug_da = da.squeeze(a_nodaug_da) a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, ) + (a_nodaug_da.shape[1:])) dim_activations = a_nodaug_da.shape[1] # Comute matrix of similarities r = da.reshape(da.sum(da.square(a_nodaug_da), axis=1), (-1, 1)) mse_matrix = (r - 2 * da.dot(a_nodaug_da, da.transpose(a_nodaug_da)) \ + da.transpose(r)) / dim_activations # Compute activations with augmentation a_daug_da = get_activations(activation_function, batch_gen_daug) a_daug_da = da.rechunk(a_daug_da, (chunk_size, dim_activations, 1)) # Compute similarity of augmentations with respect to the # activations of the original data a_nodaug_da = da.repeat(da.reshape(a_nodaug_da, a_nodaug_da.shape + (1, )), repeats=n_daug_rep, axis=2) a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, dim_activations, 1)) mse_daug = da.mean(da.square(a_nodaug_da - a_daug_da), axis=1) # Compute invariance score mse_sum = da.repeat(da.reshape(da.sum(mse_matrix, axis=1), (n_images, 1)), repeats=n_daug_rep, axis=1) mse_sum = da.rechunk(mse_sum, (chunk_size, 1)) invariance = 1 - n_images * da.divide(mse_daug, mse_sum) print('Dimensionality activations: {}x{}x{}'.format( n_images, dim_activations, n_daug_rep)) # Store HDF5 file if FLAGS.output_mse_matrix_hdf5: mse_matrix_ds = hdf5_layer.create_dataset( 'mse_matrix', shape=mse_matrix.shape, chunks=mse_matrix.chunksize, dtype=K.floatx()) mse_daug_ds = hdf5_layer.create_dataset('mse_daug', shape=mse_daug.shape, chunks=mse_daug.chunksize, dtype=K.floatx()) invariance_ds = hdf5_layer.create_dataset( 'invariance', shape=invariance.shape, chunks=invariance.chunksize, dtype=K.floatx()) time_init = time() with ProgressBar(dt=1): da.store([mse_matrix, mse_daug, invariance], [mse_matrix_ds, mse_daug_ds, invariance_ds]) time_end = time() print('Elapsed time: {}'.format(time_end - time_init)) invariance = np.ravel( np.asarray(output_hdf5[layer_name]['invariance'])) else: time_init = time() invariance = da.ravel(invariance).compute() time_end = time() print('Elapsed time: {}'.format(time_end - time_init)) # Update pandas data frame for plotting df_end_idx = df_init_idx + n_images * n_daug_rep d = pd.DataFrame( { 'Layer': layer_name, 'sample': np.repeat(np.arange(n_images), n_daug_rep), 'n_daug': np.tile(np.arange(n_daug_rep), n_images), 'invariance': invariance }, index=np.arange(df_init_idx, df_end_idx).tolist()) df = df.append(d) df_init_idx += df_end_idx pickle.dump(df, open(output_pickle, 'wb')) output_hdf5.close()
def get_bil_info(self): """Return neighbour info. Returns ------- t__ : numpy array Vertical fractional distances from corner to the new points s__ : numpy array Horizontal fractional distances from corner to the new points valid_input_index : numpy array Valid indices in the input data index_array : numpy array Mapping array from valid source points to target points """ if self.source_geo_def.size < self.neighbours: warnings.warn('Searching for %s neighbours in %s data points' % (self.neighbours, self.source_geo_def.size)) # Create kd-tree valid_input_index, resample_kdtree = self._create_resample_kdtree() # This is a numpy array self.valid_input_index = valid_input_index if resample_kdtree.n == 0: # Handle if all input data is reduced away bilinear_t, bilinear_s, valid_input_index, index_array = \ _create_empty_bil_info(self.source_geo_def, self.target_geo_def) self.bilinear_t = bilinear_t self.bilinear_s = bilinear_s self.valid_input_index = valid_input_index self.index_array = index_array return bilinear_t, bilinear_s, valid_input_index, index_array target_lons, target_lats = self.target_geo_def.get_lonlats() valid_output_idx = ((target_lons >= -180) & (target_lons <= 180) & (target_lats <= 90) & (target_lats >= -90)) index_array, distance_array = self._query_resample_kdtree( resample_kdtree, target_lons, target_lats, valid_output_idx) # Reduce index reference input_size = da.sum(self.valid_input_index) index_mask = index_array == input_size index_array = da.where(index_mask, 0, index_array) # Get output projection as pyproj object proj = Proj(self.target_geo_def.proj_str) # Get output x/y coordinates out_x, out_y = self.target_geo_def.get_proj_coords(chunks=CHUNK_SIZE) out_x = da.ravel(out_x) out_y = da.ravel(out_y) # Get input x/y coordinates in_x, in_y = _get_input_xy_dask(self.source_geo_def, proj, self.valid_input_index, index_array) # Get the four closest corner points around each output location pt_1, pt_2, pt_3, pt_4, index_array = \ _get_bounding_corners_dask(in_x, in_y, out_x, out_y, self.neighbours, index_array) # Calculate vertical and horizontal fractional distances t and s t__, s__ = _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y) self.bilinear_t, self.bilinear_s = t__, s__ self.valid_output_index = valid_output_idx self.index_array = index_array self.distance_array = distance_array self._get_slices() return (self.bilinear_t, self.bilinear_s, self.slices, self.mask_slices, self.out_coords)
#lat = da.from_array(latitude, chunks =(2030,1354)) latitude = xr.open_dataset(MOD03_file,drop_variables=var_list)['Latitude'][:,:].values lat = da.concatenate((lat,latitude),axis=0) #longitude = myd03.variables["Longitude"][:,:] # Reading Specific Variable 'Longitude'. longitude = xr.open_dataset(MOD03_file,drop_variables=var_list)['Longitude'][:,:].values #lon = da.from_array(longitude, chunks =(2030,1354)) lon = da.concatenate((lon,longitude),axis=0) print('Longitude Shape Is: ',lon.shape) print('Latitude Shape Is: ',lat.shape) cm=da.ravel(cm) lat=da.ravel(lat) lon=da.ravel(lon) lon=lon.astype(int) lat=lat.astype(int) cm=cm.astype(int) Lat=lat.to_dask_dataframe() Lon=lon.to_dask_dataframe() CM=cm.to_dask_dataframe() df=dd.concat([Lat,Lon,CM],axis=1,interleave_partitions=False) cols = {0:'Latitude',1:'Longitude',2:'CM'} df = df.rename(columns=cols)
def aggregateOneFileData(M06_file, M03_file): print("aggregateOneFileData function with M06_file and M03_file:" + M06_file + ", " + M03_file) var_list = [ 'Scan Offset', 'Track Offset', 'Height Offset', 'Height', 'SensorZenith', 'Range', 'SolarZenith', 'SolarAzimuth', 'Land/SeaMask', 'WaterPresent', 'gflags', 'Scan number', 'EV frames', 'Scan Type', 'EV start time', 'SD start time', 'SV start time', 'EV center time', 'Mirror side', 'SD Sun zenith', 'SD Sun azimuth', 'Moon Vector', 'orb_pos', 'orb_vel', 'T_inst2ECR', 'attitude_angles', 'sun_ref', 'impulse_enc', 'impulse_time', 'thermal_correction', 'SensorAzimuth' ] b1 = [] cm = np.zeros((2030, 1354), dtype=np.float32) lat = np.zeros((2030, 1354), dtype=np.float32) lon = np.zeros((2030, 1354), dtype=np.float32) #myd06 = Dataset(M06_file, "r") #CM = myd06.variables["Cloud_Mask_1km"][:,:,0]# Reading Specific Variable 'Cloud_Mask_1km'. d06 = xr.open_dataset( M06_file, drop_variables="Scan Type")['Cloud_Mask_1km'][:, :, 0].values #d06CM = d06[::3,::3] ds06_decoded = (np.array(d06, dtype='byte') & 0b00000110) >> 1 CM = np.array(ds06_decoded).byteswap().newbyteorder() cm = da.concatenate((cm, CM), axis=0) cm = da.ravel(cm) #myd03 = Dataset(M03_file, "r") #latitude = myd03.variables["Latitude"][:,:] #longitude = myd03.variables["Longitude"][:,:] latitude = xr.open_dataset( M03_file, drop_variables=var_list)['Latitude'][:, :].values longitude = xr.open_dataset( M03_file, drop_variables=var_list)['Longitude'][:, :].values lat = da.concatenate((lat, latitude), axis=0) lon = da.concatenate((lon, longitude), axis=0) #print("lat shape after con:",lat.shape) #print("lon shape after con:",lon.shape) lat = da.ravel(lat) lon = da.ravel(lon) #print("lat shape after ravel:",lat.shape) #print("lon shape after ravel:",lon.shape) cm = cm.astype(int) lon = lon.astype(int) lat = lat.astype(int) lat = lat + 90 lon = lon + 180 Lat = lat.to_dask_dataframe() Lon = lon.to_dask_dataframe() CM = cm.to_dask_dataframe() df = dd.concat([Lat, Lon, CM], axis=1, interleave_partitions=False) #print(type(df)) cols = {0: 'Latitude', 1: 'Longitude', 2: 'CM'} df = df.rename(columns=cols) df2 = (df.groupby(['Longitude', 'Latitude']).CM.apply(countzero).reset_index()) print(df2) #return df2 #df3=df2.compute() b1.append(df2) print("printing b1:") print(b1) return b1
def get_sample_from_bil_info(self, data, fill_value=np.nan, output_shape=None): if fill_value is None: fill_value = np.nan # FIXME: can be this made into a dask construct ? cols, lines = np.meshgrid(np.arange(data['x'].size), np.arange(data['y'].size)) cols = da.ravel(cols) lines = da.ravel(lines) try: self.valid_input_index = self.valid_input_index.compute() except AttributeError: pass vii = self.valid_input_index.squeeze() try: self.index_array = self.index_array.compute() except AttributeError: pass # ia contains reduced (valid) indices of the source array, and has the # shape of the destination array ia = self.index_array rlines = lines[vii][ia] rcols = cols[vii][ia] slices = [] mask_slices = [] mask_2d_added = False coords = {} try: # FIXME: Use same chunk size as input data coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask() except AttributeError: coord_x, coord_y = None, None for _, dim in enumerate(data.dims): if dim == 'y': slices.append(rlines) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_y is not None: coords[dim] = coord_y elif dim == 'x': slices.append(rcols) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_x is not None: coords[dim] = coord_x else: slices.append(slice(None)) mask_slices.append(slice(None)) try: coords[dim] = data.coords[dim] except KeyError: pass res = data.values[slices] res[mask_slices] = fill_value try: p_1 = res[:, :, 0] p_2 = res[:, :, 1] p_3 = res[:, :, 2] p_4 = res[:, :, 3] except IndexError: p_1 = res[:, 0] p_2 = res[:, 1] p_3 = res[:, 2] p_4 = res[:, 3] s__, t__ = self.bilinear_s, self.bilinear_t res = (p_1 * (1 - s__) * (1 - t__) + p_2 * s__ * (1 - t__) + p_3 * (1 - s__) * t__ + p_4 * s__ * t__) epsilon = 1e-6 data_min = da.nanmin(data) - epsilon data_max = da.nanmax(data) + epsilon idxs = (res > data_max) | (res < data_min) res = da.where(idxs, fill_value, res) shp = self.target_geo_def.shape if data.ndim == 3: res = da.reshape(res, (res.shape[0], shp[0], shp[1])) else: res = da.reshape(res, (shp[0], shp[1])) res = DataArray(da.from_array(res, chunks=CHUNK_SIZE), dims=data.dims, coords=coords) return res
def get_sample_from_bil_info(self, data, fill_value=np.nan, output_shape=None): if fill_value is None: fill_value = np.nan # FIXME: can be this made into a dask construct ? cols, lines = np.meshgrid(np.arange(data['x'].size), np.arange(data['y'].size)) cols = da.ravel(cols) lines = da.ravel(lines) try: self.valid_input_index = self.valid_input_index.compute() except AttributeError: pass vii = self.valid_input_index.squeeze() try: self.index_array = self.index_array.compute() except AttributeError: pass # ia contains reduced (valid) indices of the source array, and has the # shape of the destination array ia = self.index_array rlines = lines[vii][ia] rcols = cols[vii][ia] slices = [] mask_slices = [] mask_2d_added = False coords = {} try: # FIXME: Use same chunk size as input data coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask() except AttributeError: coord_x, coord_y = None, None for _, dim in enumerate(data.dims): if dim == 'y': slices.append(rlines) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_y is not None: coords[dim] = coord_y elif dim == 'x': slices.append(rcols) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_x is not None: coords[dim] = coord_x else: slices.append(slice(None)) mask_slices.append(slice(None)) try: coords[dim] = data.coords[dim] except KeyError: pass res = data.values[slices] res[mask_slices] = fill_value try: p_1 = res[:, :, 0] p_2 = res[:, :, 1] p_3 = res[:, :, 2] p_4 = res[:, :, 3] except IndexError: p_1 = res[:, 0] p_2 = res[:, 1] p_3 = res[:, 2] p_4 = res[:, 3] s__, t__ = self.bilinear_s, self.bilinear_t res = (p_1 * (1 - s__) * (1 - t__) + p_2 * s__ * (1 - t__) + p_3 * (1 - s__) * t__ + p_4 * s__ * t__) epsilon = 1e-6 data_min = da.nanmin(data) - epsilon data_max = da.nanmax(data) + epsilon idxs = (res > data_max) | (res < data_min) res = da.where(idxs, fill_value, res) shp = self.target_geo_def.shape if data.ndim == 3: res = da.reshape(res, (res.shape[0], shp[0], shp[1])) else: res = da.reshape(res, (shp[0], shp[1])) res = DataArray(da.from_array(res, chunks=CHUNK_SIZE), dims=data.dims, coords=coords) return res