def __init_electric_field_storage(self, tax, xax): """ Initialize electric field storage DataArray :param tax: :param xax: :param path: :return: """ electric_field_store = np.zeros((tax.size, xax.size)) ef_DA = xr.DataArray( data=electric_field_store, coords=[("time", tax), ("space", xax)] ) ef_DA.to_netcdf(self.efield_path, engine="h5netcdf", invalid_netcdf=True) self.efield_arr = xr.load_dataarray(self.efield_path, engine="h5netcdf") driver_electric_field_store = np.zeros((tax.size, xax.size)) driver_ef_DA = xr.DataArray( data=driver_electric_field_store, coords=[("time", tax), ("space", xax)] ) driver_ef_DA.to_netcdf( self.driver_efield_path, engine="h5netcdf", invalid_netcdf=True ) self.driver_efield_arr = xr.load_dataarray( self.driver_efield_path, engine="h5netcdf" )
def __load_h5(self, wt): """ Load h5 file containing the coherence tensor for the sessions and monkey specified. """ # Path to the file h5_super_tensor_path = os.path.join(self.__set_path(), self.coh_file) # Try to read the file in the path specified try: self.super_tensor = xr.load_dataarray(h5_super_tensor_path) except FileNotFoundError: raise OSError(f'File {self.coh_file} not found for monkey') # Copy axes values as class attributes self.time = self.super_tensor.times.values self.freqs = self.super_tensor.freqs.values # Copying metadata as class attributes self.session_info = {} self.session_info['nT'] = self.super_tensor.sizes["trials"] for key in self.super_tensor.attrs.keys(): self.session_info[key] = self.super_tensor.attrs[key] # Crop beginning/ending of super-tensor due to edge effects if isinstance(wt, tuple): self.time = self.time[wt[0]:-wt[1]] self.super_tensor = self.super_tensor[..., wt[0]:-wt[1]] # Get euclidean distances self.super_tensor.attrs['d_eu'] = self.__get_euclidean_distances() # Correct values bellow significance level if self.coh_sig_file is not None: # Get full path to the file sig_file_path = os.path.join(self.__set_path(), self.coh_sig_file) # Try to read the file in the path specified try: sig_values = xr.load_dataarray( sig_file_path).astype(_DEFAULT_TYPE) except FileNotFoundError: raise OSError(f'File {self.coh_sig_file} not found for monkey') # Should be an xarray with dimensions (n_roi, n_freqs, n_times) assert isinstance(sig_values, xr.DataArray) # Keep the attributes cfg = self.super_tensor.attrs # Removing values bellow siginificance level # self.super_tensor.values = self.super_tensor\ # * (self.super_tensor >= sig_values).astype(_DEFAULT_TYPE) self.super_tensor.values = ( self.super_tensor - sig_values).astype(_DEFAULT_TYPE) # Remove negative values self.super_tensor.values = np.clip(self.super_tensor.values, 0, np.inf) # Restoring attributes self.super_tensor.attrs = cfg
def load_max(par_id): """TO DO""" done = {} interp = False for fn in os.listdir("data"): if not fn.endswith(".grb2"): continue _, pid, y, m = _grid_parameter_year_month_grb2(fn) if pid != par_id: continue if pid == "wind": wind = xr.load_dataset(f"data/{fn}", engine="cfgrib") vel = (wind.u**2 + wind.v**2)**0.5 xarr = vel.max("step") else: xarr = xr.load_dataarray(f"data/{fn}", engine="cfgrib").max("step") if y in done: done[y].append(xarr) else: done[y] = [xarr] if len(done) != 0: interp = True ref = xarr for fn in os.listdir("data"): if not fn.endswith(".grb"): continue _, pid, y, m = _grid_parameter_year_month(fn) if pid != par_id: continue if pid == "wind": da = xr.load_dataarray(f"data/{fn}", engine="cfgrib") xarr = gen_dataset_from_raster(f"data/{fn}", da.longitude.values, da.latitude.values, da.time.values) else: xarr = xr.load_dataarray(f"data/{fn}", engine="cfgrib") # Compute element-wise maximum if interp: xarr = xarr.interp_like(ref).max("time") else: xarr = xarr.max("time") if y in done: done[y].append(xarr) else: done[y] = [xarr] maxima = [] for y in done: maximum = xr.full_like(done[y][0], np.nan) for arr in done[y]: maximum = np.fmax(maximum.values, arr) maxima.append(maximum) return maxima
def assemble_WRF_pwv(path=des_path, work_path=work_yuval, radius=1): from PW_stations import produce_geo_gnss_solved_stations import xarray as xr from aux_gps import save_ncfile from aux_gps import get_nearest_lat_lon_for_xy from aux_gps import get_unique_index df = produce_geo_gnss_solved_stations(path=work_path / 'gis', plot=False) dsea_point = df.loc['dsea'][['lat', 'lon']].astype(float).values if radius is not None: point = None else: point = dsea_point wrf_pw = read_all_WRF_GNSS_files(path, var='pw', point=point) wrf_pw8 = xr.load_dataarray( path / 'pw_wrfout_d04_2014-08-08_40lev.nc').sel(Time='2014-08-08') wrf_pw16 = xr.load_dataarray( path / 'pw_wrfout_d04_2014-08-16_40lev.nc').sel(Time='2014-08-16') wrf_pw_8_16 = xr.concat([wrf_pw8, wrf_pw16], 'Time') print('looking for {} at wrf.'.format(dsea_point)) loc = get_nearest_lat_lon_for_xy(wrf_pw_8_16['XLAT'], wrf_pw_8_16['XLONG'], dsea_point) print(loc) if radius is not None: print('getting {} radius around {}.'.format(radius, dsea_point)) lat_islice = [loc[0][0] - radius, loc[0][0] + radius + 1] lon_islice = [loc[0][1] - radius, loc[0][1] + radius + 1] wrf_pw_8_16 = wrf_pw_8_16.isel(south_north=slice(*lat_islice), west_east=slice(*lon_islice)) loc = get_nearest_lat_lon_for_xy(wrf_pw['XLAT'], wrf_pw['XLONG'], dsea_point) lat_islice = [loc[0][0] - radius, loc[0][0] + radius + 1] lon_islice = [loc[0][1] - radius, loc[0][1] + radius + 1] wrf_pw = wrf_pw.isel(south_north=slice(*lat_islice), west_east=slice(*lon_islice)) else: wrf_pw_8_16 = wrf_pw_8_16.isel(south_north=loc[0][0], west_east=loc[0][1]) wrf_pw = xr.concat([wrf_pw, wrf_pw_8_16], 'Time') wrf_pw = wrf_pw.rename({'Time': 'time'}) wrf_pw = wrf_pw.sortby('time') wrf_pw = get_unique_index(wrf_pw) if wrf_pw.attrs['projection'] is not None: wrf_pw.attrs['projection'] = wrf_pw.attrs['projection'].proj4() if radius is not None: filename = 'pwv_wrf_dsea_gnss_radius_{}_2014-08.nc'.format(radius) else: filename = 'pwv_wrf_dsea_gnss_point_2014-08.nc' save_ncfile(wrf_pw, des_path, filename) return wrf_pw
def run_test(self, benchmark, file, expected): benchmark = benchmark_pool[benchmark] precomputed_features = Path(__file__).parent / file precomputed_features = BehavioralAssembly( xr.load_dataarray(precomputed_features)) precomputed_features = precomputed_features.stack( presentation=['stimulus_path']) precomputed_paths = list( map(lambda f: Path(f).name, precomputed_features['stimulus_path'].values)) # attach stimulus set meta stimulus_set = benchmark._assembly.stimulus_set expected_stimulus_paths = [ stimulus_set.get_image(image_id) for image_id in stimulus_set['image_id'] ] expected_stimulus_paths = list( map(lambda f: Path(f).name, expected_stimulus_paths)) assert set(precomputed_paths) == set(expected_stimulus_paths) for column in stimulus_set.columns: precomputed_features[column] = 'presentation', stimulus_set[ column].values precomputed_features = PrecomputedFeatures( precomputed_features, visual_degrees=10, # doesn't matter, features are already computed ) # score score = benchmark(precomputed_features).raw assert score.sel(aggregation='center') == expected
def test_xarray_readwrite(tmp_path): ex = get_example_explorer() xar = ex.get_variables_xarray(("Mass", "Speed"), isotope=["Pb187", "Pb186"], time_sec=np.linspace(0, 10, 100)) tmp_file = tmp_path / 'caex_test.nc' xar.to_netcdf(tmp_file) rxar = xarray.load_dataarray(tmp_file) assert rxar.shape == (2, 2, 100) assert rxar.dims == ('varname', 'isotope', 'time_sec') # Why, just why don't you preserve the order in coords?? dims = {k: rxar.coords[k] for k in rxar.dims} vals = ex.map(rxar.sel, **dims) assert vals.shape == (2, 2, 100) fig = ex.plot_xarray(rxar) axes = fig.axes assert len(axes) == 2 assert len(axes[0].lines) == 2 assert len(axes[1].lines) == 2 assert len(axes[1].lines[0].get_xdata()) == 100 xvals = axes[1].lines[0].get_xdata() assert np.allclose(xvals, np.linspace(0, 10, 100))
def joint_distribution(par_ids, months, lon, lat): """Returns an array parameter season mean and their cumulative probability estimate.""" done1 = {} done2 = {} for fn in os.listdir("data"): if fn.endswith(".grb"): _, pid, y, m = _grid_parameter_year_month(fn) elif fn.endswith(".grb2"): _, pid, y, m = _grid_parameter_year_month_grb2(fn) else: continue if pid not in par_ids: continue if m not in months: continue logging.info("Reading %s", fn) x = xr.load_dataarray(f"data/{fn}", engine="cfgrib").sel(longitude=lon, latitude=lat, method="nearest").values if pid == par_ids[0]: done1[(y, m)] = x else: done2[(y, m)] = x xall = [[], []] for k in done1: xall[0].extend(done1[k]) xall[1].extend(done2[k]) return np.array(xall)
def __init_dist_func_storage(self, tax, xax, vax, f_storage_rules): """ Initialize distribution function storage :param tax: :param xax: :param vax: :param path: :return: """ if f_storage_rules == "all-x": dist_func_store = np.zeros((tax.size, xax.size, vax.size)) f_DA = xr.DataArray( data=dist_func_store, coords=[("time", tax), ("space", xax), ("velocity", vax)], ) else: kax = np.linspace(0, 1, 2) dist_func_store = np.zeros((tax.size, kax.size, vax.size), dtype=np.complex) f_DA = xr.DataArray( data=dist_func_store, coords=[("time", tax), ("fourier_mode", kax), ("velocity", vax)], ) f_DA.to_netcdf(self.f_path, engine="h5netcdf", invalid_netcdf=True) self.f_arr = xr.load_dataarray(self.f_path, engine="h5netcdf")
def cache_read(filename): """read file in cache folder""" path = _path_cache / filename try: return xr.load_dataarray(path) except ValueError: return xr.load_dataset(path)
def weibull_data(par_id, months, lon, lat): """Returns an array parameter season mean and their cumulative probability estimate.""" done = {} for fn in os.listdir("data"): if fn.endswith(".grb"): _, pid, y, m = _grid_parameter_year_month(fn) elif fn.endswith(".grb2"): _, pid, y, m = _grid_parameter_year_month_grb2(fn) else: continue if pid != par_id: continue if m not in months: continue logging.info("Reading %s", fn) x = xr.load_dataarray(f"data/{fn}", engine="cfgrib").sel(longitude=lon, latitude=lat, method="nearest").values if y in done: done[y].append(x.max()) else: done[y] = [x.max()] xall = [] for i, y in enumerate(done): xall.append(max(done[y])) return np.array(xall)
def from_file(cls, filename): """ Load signal from saved file. :param filename: filename for the Signal :type filename: str """ if not filename.endswith(NC_EXT): filename += NC_EXT # load NC file xarray = xr.load_dataarray(filename) # init class signal = cls(xarray) # if nc file has attributes, copy them to signal class if xarray.attrs: process_steps = [] for k, v in xarray.attrs.items(): if cls.PROCESS_STEPS_KEY in k: idx = int(k[len(cls.PROCESS_STEPS_KEY) + 1:]) process_steps.insert(idx, v) else: setattr(signal, k, v) else: logging.warning("No metadata found, setting empty...") process_steps = [ f"raw {signal.signal_type} signal: {signal.start_time}--" f"{signal.end_time}s" ] setattr(signal, cls.PROCESS_STEPS_KEY, process_steps) return signal
def mme_dataset(paths, to_celsius=False): base = xr.open_dataarray(paths[0]) base_coords = base.coords n = len(paths) s = np.zeros_like(base) for i, p in enumerate(paths): print(i + 1, n, p.name) da = xr.load_dataarray(p) if to_celsius: s += da.values - 273.15 else: s += da.values da.close() s /= n base.close() mme = xr.DataArray( avg_arr, { 'time': base_coords['time'].values, 'lat': base_coords['lat'].values, 'lon': base_coords['lon'].values }, dims=['time', 'lat', 'lon'], # attrs=t_attr, name='pr') return mme
def merge(self, data, step, priority=None): ds = xr.load_dataarray(self.filename) if step in ds.step: ds2 = self.merge_step(data, ds, priority) else: ds2 = xr.concat([ds, data], dim='step') return ds2
def read_backgrounds(self, path, prefix='mix.', suffix='.nc', step='apos', var='CO2.bg', field='background'): for site in self.sites.itertuples(): ds = xr.load_dataarray( os.path.join(path, f'{prefix}{site.code}{suffix}')) # Select the good level level = site.height if not level in ds.level: if len(ds.level) == 1: level = -1 # take the only level available logger.warning( f"Use level {level} for site {site.code} with sampling height of {site.height} m" ) else: import pdb pdb.set_trace() # Load the data bg = ds.sel(step=step, var=var, level=level) # Temporal interpolation bg_interp = interp( self.observations.loc[self.observations.site == site.code, 'time'], bg.time, bg) self.observations.loc[self.observations.site == site.code, field] = bg_interp
def data_extract(lon, lat): files = { pid: open(f"tmp/data-{pid}-{lon}-{lat}.csv", 'x') for pid in ["hs", "tp", "dp"] } for fn in os.listdir("data"): if fn.endswith(".grb"): _, pid, y, m = _grid_parameter_year_month(fn) elif fn.endswith(".grb2"): _, pid, y, m = _grid_parameter_year_month_grb2(fn) else: continue if pid in files: logging.info("Reading %s", fn) da = xr.load_dataarray(f"data/{fn}", engine="cfgrib").sel(longitude=lon, latitude=lat, method="nearest") df = da.to_dataframe() if df.index.name != "time": df.time += df.index df.set_index('time', inplace=True) df[df.columns[-1]][1:].to_csv(files[pid], header=False, line_terminator='\n') for pid in files: files[pid].close()
def run_LR(path=mri_path, annual=False, times=['2000', None], detrend=False): from sklearn.linear_model import LinearRegression import xarray as xr from aux_functions_strat import anomalize_xr # load h2o: h2o = xr.load_dataarray(path / 'vmrh2o_equatorial_8000_Pa_1960-2099.nc') h2o = h2o.sel(time=slice(*times)) # convert to ppmv: h2o *= 1e6 h2o -= h2o.sel(time=slice('2000', '2009')).mean('time') # h2o /= h2o.sel(time=slice('2000', '2009')).std('time') # load t500: t500 = xr.load_dataarray(path / 'ta_equatorial_50000_Pa_1960-2099.nc') t500 = t500.sel(time=slice(*times)) t500 -= t500.sel(time=slice('2000', '2009')).mean('time') # t500 /= t500.sel(time=slice('2000', '2009')).std('time') # load u50: u50 = xr.load_dataarray(path / 'ua_equatorial_5000_Pa_1960-2099.nc') # now produce qbo: u50 = u50.sel(time=slice(*times)) u50 -= u50.sel(time=slice('2000', '2009')).mean('time') # u50 /= u50.sel(time=slice('2000', '2009')).std('time') # qbo = anomalize_xr(u50, 'MS', units='std') # detrend h2o and t500: if detrend: t500 = detrend_ts(t500) h2o = detrend_ts(h2o) # produce X, y: # y = anomalize_xr(h2o, 'MS', units='std') y = h2o X = xr.merge([t500, u50]).to_array('X') X = X.transpose('time', ...) if annual: y = y.resample(time='AS').mean() X = X.resample(time='AS').mean() # linear regresion: lr = LinearRegression() lr.fit(X, y) pred = xr.DataArray(lr.predict(X), dims=['time']) pred['time'] = y['time'] print(lr.coef_) print(lr.score(X, y)) y.plot(color='b') pred.plot(color='r') df = X.to_dataset('X').to_dataframe() df['h2o'] = y.to_dataframe() return lr, df
def test_exact_activations(pca_components): activations = test_from_image_path(model_ctr=pytorch_alexnet_resize, layers=['features.12', 'classifier.5'], image_name='rgb.jpg', pca_components=pca_components, logits=False) path_to_expected = Path( __file__).parent / f'alexnet-rgb-{pca_components}.nc' expected = xr.load_dataarray(path_to_expected) assert (activations == expected).all()
def test_Kar2019ost_cornet_s(self): benchmark = benchmark_pool['dicarlo.Kar2019-ost'] precomputed_features = Path(__file__).parent / 'cornet_s-kar2019.nc' precomputed_features = BehavioralAssembly( xr.load_dataarray(precomputed_features)) precomputed_features = PrecomputedFeatures(precomputed_features, visual_degrees=8) # score score = benchmark(precomputed_features).raw assert score.sel(aggregation='center') == approx(.316, abs=.005)
def test_model(self, model, expected_score): # assemblies objectome = load_assembly() probabilities = Path(__file__).parent / f'{model}-probabilities.nc' probabilities = BehavioralAssembly(xr.load_dataarray(probabilities)) # metric i2n = I2n() score = i2n(probabilities, objectome) score = score.sel(aggregation='center') assert score == approx(expected_score, abs=0.005), f"expected {expected_score}, but got {score}"
def download_all_10mins_ims(savepath, channel_name='TD'): """download all 10mins stations per specified channel, updateing fields is automatic""" from aux_gps import path_glob import xarray as xr import logging logger = logging.getLogger('ims_downloader') glob = '*_{}_10mins.nc'.format(channel_name) files = sorted(path_glob(savepath, glob, return_empty_list=True)) files = [x for x in files if x.is_file()] if files: time_dim = list(set(xr.open_dataarray(files[0]).dims))[0] last_dates = [check_ds_last_datetime(xr.open_dataarray(x)) for x in files] st_id_downloaded = [ int(x.as_posix().split('/')[-1].split('_')[1]) for x in files ] d = dict(zip(st_id_downloaded, last_dates)) stations = ims_api_get_meta(active_only=True, channel_name=channel_name) for index, row in stations.iterrows(): st_id = row['stationId'] if st_id not in d.keys(): download_ims_single_station(savepath=savepath, channel_name=channel_name, stationid=st_id, update=None) elif st_id in d.keys(): logger.info('updating station {}...'.format(st_id)) da = download_ims_single_station(savepath=savepath, channel_name=channel_name, stationid=st_id, update=d[st_id]) if da is not None: file = path_glob( savepath, '*_{}_{}_10mins.nc'.format(st_id, channel_name))[0] da_old = xr.load_dataarray(file) da = xr.concat([da, da_old], time_dim) filename = '_'.join([ '-'.join(row['name'].split(' ')), str(st_id), channel_name, '10mins' ]) + '.nc' comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in da.to_dataset().data_vars} logger.info('saving to {} to {}'.format(filename, savepath)) try: da.to_netcdf(savepath / filename, 'w', encoding=encoding) except PermissionError: (savepath / filename).unlink() da.to_netcdf(savepath / filename, 'w', encoding=encoding) # print('done!') else: logger.warning('station {} is already in {}, skipping...'.format( st_id, savepath)) return
def cache_read(filename, verbose=False): """read file in cache folder""" path = _path_cache / filename try: result = xr.load_dataarray(path) except ValueError: result = xr.load_dataset(path) if verbose: print(f"Return stored {result.__class__.__name__} from {path}") return result
def check_rotor_swept_area(): is_built = xr.open_dataarray(OUTPUT_DIR / "turbine-time-series" / "is_built.nc") num_turbines = is_built.sum(dim="turbines") rotor_swept_area = xr.load_dataarray( OUTPUT_DIR / "turbine-time-series" / "rotor_swept_area.nc" ) min_rotor_diameter = 10 max_rotor_diameter = 180 avg_rotor_swept_area = rotor_swept_area / num_turbines assert np.all(min_rotor_diameter ** 2 / 4 * np.pi < avg_rotor_swept_area) & np.all( avg_rotor_swept_area < max_rotor_diameter ** 2 / 4 * np.pi ), "implausible average rotor diameter"
def test_precomputed(self, model, expected_score): benchmark = DicarloRajalingham2018I2n() probabilities = Path( __file__ ).parent.parent / 'test_metrics' / f'{model}-probabilities.nc' probabilities = BehavioralAssembly(xr.load_dataarray(probabilities)) candidate = PrecomputedProbabilities(probabilities) score = benchmark(candidate) assert score.raw.sel(aggregation='center') == approx(expected_score, abs=.005) assert score.sel(aggregation='center') == approx(expected_score / np.sqrt(.479), abs=.005)
def test_Rajalingham2018public(self): benchmark = benchmark_pool['dicarlo.Rajalingham2018public-i2n'] # load features precomputed_features = Path( __file__).parent / 'CORnetZ-rajalingham2018public.nc' precomputed_features = BehavioralAssembly( xr.load_dataarray(precomputed_features)) precomputed_features = PrecomputedFeatures( precomputed_features, visual_degrees=8, # doesn't matter, features are already computed ) # score score = benchmark(precomputed_features).raw assert score.sel(aggregation='center') == approx(.136923, abs=.005)
def calc_correlation_efficiency_vs_input_power_density(): rotor_swept_area = xr.load_dataarray(OUTPUT_DIR / "turbine-time-series" / "rotor_swept_area.nc") p_in = xr.load_dataarray(OUTPUT_DIR / "power_in_wind" / "p_in_monthly.nc") p_in = p_in.sortby("time") p_out = load_generated_energy_gwh() p_out = p_out / p_out.time.dt.days_in_month / 24 p_out = p_out.sortby("time") p_in = filter2010(p_in) p_out = filter2010(p_out) rotor_swept_area = filter2010(rotor_swept_area) efficiency = p_out / p_in p_in_density = p_in / rotor_swept_area * 1e9 correlation = np.corrcoef(p_in_density, efficiency)[0, 1] write_data_value( "correlation-efficiency-vs-input-power-density", f"{correlation:.3f}", )
def batched_write_to_file(self): """ Write batched to file This is to save time by keeping some of the history on accelerator rather than passing it back every time step :param t_range: :param e: :param e_driver: :param f: :return: """ t_xr = xr.DataArray(data=self.temp_t_store, dims=["time"]) self.efield_arr.loc[t_xr, :] = self.temp_field_store self.driver_efield_arr.loc[t_xr, :] = self.temp_driver_store # Save and reopen self.efield_arr.to_netcdf( self.efield_path, engine="h5netcdf", invalid_netcdf=True ) self.driver_efield_arr.to_netcdf( self.driver_efield_path, engine="h5netcdf", invalid_netcdf=True ) self.efield_arr = xr.load_dataarray(self.efield_path, engine="h5netcdf") self.driver_efield_arr = xr.load_dataarray( self.driver_efield_path, engine="h5netcdf" ) if self.store_f is not None: self.f_arr.loc[t_xr,] = self.temp_dist_store self.f_arr.to_netcdf(self.f_path, engine="h5netcdf", invalid_netcdf=True) self.f_arr = xr.load_dataarray(self.f_path, engine="h5netcdf")
def open_and_format(paths: list): l = len(paths) for i, path in enumerate(paths): print(i + 1, l, path.name) da = xr.load_dataarray(path).resample(time='MS').sum() da_m = da.where(obs_land.notnull()) time_var = path.parent.parent.name folder_name = path.name.split('_')[4] for i in range(2015, 2100, 17): r = ut.select_year(da_m, i, i + 16) out_path = ut.save_file(out / time_var / folder_name / f'{folder_name}_{time_var}_{i}-{i+16}.csv') print('\tsaving', out_path, '\r', flush=True, end='') format_cdbc(r).to_csv(out_path, index=False, header=False) print()
def test_model(self, model, expected_score): class UnceiledBenchmark(DicarloRajalingham2018I2n): def __call__(self, candidate: BrainModel): candidate.start_task(BrainModel.Task.probabilities, self._fitting_stimuli) probabilities = candidate.look_at(self._assembly.stimulus_set) score = self._metric(probabilities, self._assembly) return score benchmark = UnceiledBenchmark() # features path_to_expected = Path( __file__ ).parent / f'identifier={model},stimuli_identifier=objectome-240.nc' feature_responses = xr.load_dataarray(path_to_expected) feature_responses['image_id'] = 'stimulus_path', [ os.path.splitext(os.path.basename(path))[0] for path in feature_responses['stimulus_path'].values ] feature_responses = feature_responses.stack( presentation=['stimulus_path']) assert len(np.unique( feature_responses['layer'])) == 1 # only penultimate layer class PrecomputedFeatures: def __init__(self, precomputed_features): self.features = precomputed_features def __call__(self, stimuli, layers): np.testing.assert_array_equal(layers, ['behavioral-layer']) self_image_ids = self.features['image_id'].values.tolist() indices = [ self_image_ids.index(image_id) for image_id in stimuli['image_id'].values ] features = self.features[{'presentation': indices}] return features # evaluate candidate transformation = ProbabilitiesMapping( identifier=f'TestI2N.{model}', activations_model=PrecomputedFeatures(feature_responses), layer='behavioral-layer') score = benchmark(transformation) score = score.sel(aggregation='center') assert score == approx( expected_score, abs=0.005), f"expected {expected_score}, but got {score}"
def combine_grib_vars_to_xr(grib_dir: Path, output_dir: Path): fname = grib_dir.parts[-1] + '.nc' xr_fpath = output_dir / fname if xr_fpath.exists(): raise ValueError(f"{xr_fpath} already exists.") ds_dict = {} for f in grib_dir.iterdir(): if f.suffix == '.grib': try: var_name = f.name.split('.')[0] ds_dict[var_name] = xr.load_dataarray( f, engine='cfgrib').rename(var_name) except: print("DatasetBuildError on ", f) # additinal loading just to load the attributes information ds = xr.Dataset(ds_dict) ds.to_netcdf(xr_fpath)
def load_profile_data(path: Union[Path, str]) -> xr.DataArray: """Load the data at the given path into an xr.DataArray. Right now, this is just a wrapper around `xr.load_dataarray` but I made it just so we'd be using a standarzied IO interface for the data. Parameters ---------- path : Union[Path, str] path to the profile data Returns ------- xr.DataArray the data """ data = xr.load_dataarray(path) logging.info("Loaded data from %s" % path) return data