def wrapyear(data, data_prev, data_next, daymin, daymax, year=None): """Wrap daily data from previous and next years for extended day ranges. """ daynm = atm.get_coord(data, 'day', 'name') def leap_adjust(data, year): data = atm.squeeze(data) ndays = 365 if year is not None and atm.isleap(year): ndays += 1 else: # Remove NaN for day 366 in non-leap year data = atm.subset(data, {'day' : (1, ndays)}) return data, ndays data, ndays = leap_adjust(data, year) if data_prev is not None: data_prev, ndays_prev = leap_adjust(data_prev, year - 1) data_prev[daynm] = data_prev[daynm] - ndays_prev data_out = xray.concat([data_prev, data], dim=daynm) else: data_out = data if data_next is not None: data_next, _ = leap_adjust(data_next, year + 1) data_next[daynm] = data_next[daynm] + ndays data_out = xray.concat([data_out, data_next], dim=daynm) data_out = atm.subset(data_out, {daynm : (daymin, daymax)}) return data_out
def test_concat_size0(self): data = create_test_data() split_data = [data.isel(dim1=slice(0, 0)), data] actual = concat(split_data, 'dim1') self.assertDatasetIdentical(data, actual) actual = concat(split_data[::-1], 'dim1') self.assertDatasetIdentical(data, actual)
def test_concat(self): # TODO: simplify and split this test case # drop the third dimension to keep things relatively understandable data = create_test_data().drop('dim3') split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] self.assertDatasetIdentical(data, concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, concat(datasets, dim)) self.assertDatasetIdentical(data, concat(datasets, data[dim])) self.assertDatasetIdentical( data, concat(datasets, data[dim], coords='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [ k for k, v in iteritems(data.coords) if dim in v.dims and k != dim ] actual = concat(datasets, data[dim], coords=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = concat(datasets, data[dim], coords='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # make sure the coords argument behaves as expected data.coords['extra'] = ('dim4', np.arange(3)) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=True)] actual = concat(datasets, data[dim], coords='all') expected = np.array( [data['extra'].values for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['extra'].values, expected) actual = concat(datasets, data[dim], coords='different') self.assertDataArrayEqual(data['extra'], actual['extra']) actual = concat(datasets, data[dim], coords='minimal') self.assertDataArrayEqual(data['extra'], actual['extra']) # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dim self.assertDatasetIdentical(expected, concat(datasets, dim))
def test_concat_constant_index(self): # GH425 ds1 = Dataset({'foo': 1.5}, {'y': 1}) ds2 = Dataset({'foo': 2.5}, {'y': 1}) expected = Dataset({'foo': ('y', [1.5, 2.5]), 'y': [1, 1]}) for mode in ['different', 'all', ['foo']]: actual = concat([ds1, ds2], 'y', data_vars=mode) self.assertDatasetIdentical(expected, actual) with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): concat([ds1, ds2], 'y', data_vars='minimal')
def test_concat(self): # TODO: simplify and split this test case # drop the third dimension to keep things relatively understandable data = create_test_data().drop('dim3') split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] self.assertDatasetIdentical(data, concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, concat(datasets, dim)) self.assertDatasetIdentical( data, concat(datasets, data[dim])) self.assertDatasetIdentical( data, concat(datasets, data[dim], coords='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in iteritems(data.coords) if dim in v.dims and k != dim] actual = concat(datasets, data[dim], coords=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = concat(datasets, data[dim], coords='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # make sure the coords argument behaves as expected data.coords['extra'] = ('dim4', np.arange(3)) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=True)] actual = concat(datasets, data[dim], coords='all') expected = np.array([data['extra'].values for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['extra'].values, expected) actual = concat(datasets, data[dim], coords='different') self.assertDataArrayEqual(data['extra'], actual['extra']) actual = concat(datasets, data[dim], coords='minimal') self.assertDataArrayEqual(data['extra'], actual['extra']) # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dim self.assertDatasetIdentical(expected, concat(datasets, dim))
def test_concat(self): ds = Dataset({ 'foo': (['x', 'y'], np.random.random((10, 20))), 'bar': (['x', 'y'], np.random.random((10, 20))) }) foo = ds['foo'] bar = ds['bar'] # from dataset array: expected = DataArray(np.array([foo.values, bar.values]), dims=['w', 'x', 'y']) actual = concat([foo, bar], 'w') self.assertDataArrayEqual(expected, actual) # from iteration: grouped = [g for _, g in foo.groupby('x')] stacked = concat(grouped, ds['x']) self.assertDataArrayIdentical(foo, stacked) # with an index as the 'dim' argument stacked = concat(grouped, ds.indexes['x']) self.assertDataArrayIdentical(foo, stacked) actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True) expected = foo[:2].rename({'x': 'concat_dim'}) self.assertDataArrayIdentical(expected, actual) actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True) expected = foo[:2].rename({'x': 'concat_dim'}) self.assertDataArrayIdentical(expected, actual) with self.assertRaisesRegexp(ValueError, 'not identical'): concat([foo, bar], dim='w', compat='identical') with self.assertRaisesRegexp(ValueError, 'not a valid argument'): concat([foo, bar], dim='w', data_vars='minimal')
def test_concat(self): ds = Dataset({'foo': (['x', 'y'], np.random.random((10, 20))), 'bar': (['x', 'y'], np.random.random((10, 20)))}) foo = ds['foo'] bar = ds['bar'] # from dataset array: expected = DataArray(np.array([foo.values, bar.values]), dims=['w', 'x', 'y']) actual = concat([foo, bar], 'w') self.assertDataArrayEqual(expected, actual) # from iteration: grouped = [g for _, g in foo.groupby('x')] stacked = concat(grouped, ds['x']) self.assertDataArrayIdentical(foo, stacked) # with an index as the 'dim' argument stacked = concat(grouped, ds.indexes['x']) self.assertDataArrayIdentical(foo, stacked) actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True) expected = foo[:2].rename({'x': 'concat_dim'}) self.assertDataArrayIdentical(expected, actual) actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True) expected = foo[:2].rename({'x': 'concat_dim'}) self.assertDataArrayIdentical(expected, actual) with self.assertRaisesRegexp(ValueError, 'not identical'): concat([foo, bar], dim='w', compat='identical') with self.assertRaisesRegexp(ValueError, 'not a valid argument'): concat([foo, bar], dim='w', data_vars='minimal')
def test_concat_coords(self): data = Dataset({'foo': ('x', np.random.randn(10))}) expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5)) objs = [data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1)] for coords in ['different', 'all', ['c']]: actual = concat(objs, dim='x', coords=coords) self.assertDatasetIdentical(expected, actual) for coords in ['minimal', []]: with self.assertRaisesRegexp(ValueError, 'not equal across'): concat(objs, dim='x', coords=coords)
def test_concat_do_not_promote(self): # GH438 objs = [Dataset({'y': ('t', [1])}, {'x': 1}), Dataset({'y': ('t', [2])}, {'x': 1})] expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]}) actual = concat(objs, 't') self.assertDatasetIdentical(expected, actual) objs = [Dataset({'y': ('t', [1])}, {'x': 1}), Dataset({'y': ('t', [2])}, {'x': 2})] with self.assertRaises(ValueError): concat(objs, 't', coords='minimal')
def test_concat_coords(self): data = Dataset({'foo': ('x', np.random.randn(10))}) expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5)) objs = [ data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1) ] for coords in ['different', 'all', ['c']]: actual = concat(objs, dim='x', coords=coords) self.assertDatasetIdentical(expected, actual) for coords in ['minimal', []]: with self.assertRaisesRegexp(ValueError, 'not equal across'): concat(objs, dim='x', coords=coords)
def cen_diff(cls, arr, dim, spacing=1, is_coord=False, do_edges_one_sided=False): """Centered differencing of the DataArray or Dataset. :param arr: Data to be center-differenced. :type arr: `xray.DataArray` or `xray.Dataset` :param str dim: Dimension over which to perform the differencing. :param int spacing: How many gridpoints over to use. Size of resulting array depends on this value. :param do_edges_one_sided: Whether or not to fill in the edge cells that don't have the needed neighbor cells for the stencil. If `True`, use one-sided differencing with the same order of accuracy as `order`, and the outputted array is the same shape as `arr`. If `False`, the outputted array has a length in the computed axis reduced by `order`. """ if spacing < 1: raise ValueError("Centered differencing cannot have spacing < 1") left = arr.isel(**{dim: slice(0, -spacing)}) right = arr.isel(**{dim: slice(spacing, None)}) # Centered differencing = sum of intermediate forward differences diff = cls.fwd_diff1(right, dim, is_coord=is_coord) + cls.bwd_diff1(left, dim, is_coord=is_coord) if do_edges_one_sided: left = arr.isel(**{dim: slice(0, 2)}) right = arr.isel(**{dim: slice(-2, None)}) diff_left = cls.fwd_diff1(left, dim, is_coord=is_coord) diff_right = cls.bwd_diff1(right, dim, is_coord=is_coord) diff = xray.concat([diff_left, diff, diff_right], dim=dim) return diff
def wrapyear_all(data, daymin, daymax): """Wrap daily data to extended ranges over each year in yearly data.""" def extract_year(data, year, years): if year in years: data_out = atm.subset(data, {'year' : (year, year)}) else: data_out = None return data_out daynm = atm.get_coord(data, 'day', 'name') days = np.arange(daymin, daymax + 1) days = xray.DataArray(days, name=daynm, coords={daynm : days}) years = atm.get_coord(data, 'year') yearnm = atm.get_coord(data, 'year', 'name') for y, year in enumerate(years): year_prev, year_next = year - 1, year + 1 var = extract_year(data, year, years) var_prev = extract_year(data, year_prev, years) var_next = extract_year(data, year_next, years) var_out = wrapyear(var, var_prev, var_next, daymin, daymax, year) var_out = atm.expand_dims(var_out, 'year', year, axis=0) var_out = var_out.reindex_like(days) if y == 0: data_out = var_out else: data_out = xray.concat([data_out, var_out], dim=yearnm) return data_out
def fetch_full_san_data(stream_key, time_range, location_metadata=None): """ Given a time range and stream key. Genereate all data in the inverval using data from the SAN. :param stream_key: :param time_range: :return: """ if location_metadata is None: location_metadata = get_san_location_metadata(stream_key, time_range) # get which bins we can gather data from ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True) if not os.path.exists(ref_des_dir): log.warning("Reference Designator does not exist in offloaded DataSAN") return None data = [] next_index = 0 for time_bin in location_metadata.bin_list: direct = dir_string.format(time_bin) if os.path.exists(direct): # get data from all of the deployments deployments = os.listdir(direct) for deployment in deployments: full_path = os.path.join(direct, deployment) if os.path.isdir(full_path): new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range, index_start=next_index) if new_data is not None: data.append(new_data) # Keep track of indexes so they are unique in the final dataset next_index += len(new_data['index']) if len(data) == 0: return None return xray.concat(data, dim='index')
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ): ''' take list of consecutive netcdf files (made for CMIP5 data) and stack them into a single larger netcdf file. This was necessary to overcome some bugginess in how MFDataset is dealing with different calendar units on different files. This is technically valid CF-Compliant metadata, but is tricky to work with. This hack allows us to get around some of this unpredictable behavior. PARAMETERS: ----------- filelist = [list] list of string file paths to the sorted netcdf files to stack together output_filename = [str] path to and name of the output file to be generated (.nc extension) dim = [str] dimension to stack on -- default is 'time' begin_time = [str] PANDAS style datetime string syntax -- used in xray end_time = [str] PANDAS style datetime string syntax -- used in xray format = [str] output NetCDF format desired. valid strings are: 'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC' default is 'NETCDF4' **kwargs -- potential future arguments or overloaded args to pass through (none implemented) RETURNS: -------- output_filename as string, with the important side-effect of writing data to disk ''' import xray with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds: # time slicer condition if begin_time != None and end_time != None: ds = ds.loc[ { dim:slice( begin_time, end_time ) } ] if os.path.exists( output_filename ): os.remove( output_filename ) ds.to_netcdf( output_filename, mode='w', format=nc_format ) return output_filename
def test_concat_do_not_promote(self): # GH438 objs = [ Dataset({'y': ('t', [1])}, {'x': 1}), Dataset({'y': ('t', [2])}, {'x': 1}) ] expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]}) actual = concat(objs, 't') self.assertDatasetIdentical(expected, actual) objs = [ Dataset({'y': ('t', [1])}, {'x': 1}), Dataset({'y': ('t', [2])}, {'x': 2}) ] with self.assertRaises(ValueError): concat(objs, 't', coords='minimal')
def compile_datasets(datasets): """ Given a list of datasets. Possibly containing None. Return a single dataset with unique indexes and sorted by the 'time' parameter :param datasets: :return: """ # filter out the Nones datasets = filter(None, datasets) if len(datasets) == 0: return None datasets.sort(key=lambda val: val['time'].values[0]) # now determine if they are in order or not.. start = 0 end = 0 idx = 0 for ds in datasets: ns = ds['time'].min() ne = ds['time'].max() # Determine if the max and the min are all in order start = ns end = ne new_index = [i for i in range(idx, idx + len(ds['index']))] ds['index'] = new_index idx = new_index[-1] + 1 dataset = xray.concat(datasets, dim='index') sorted_idx = dataset.time.argsort() dataset = dataset.reindex({'index': sorted_idx}) return dataset
def read_data(data_dir, lat, lon, resample=None): files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)]) dss = [xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files] ds = xr.concat([dr.load() for dr in dss], 'time') if resample is not None: ds = ds.resample(resample, 'time') return ds
def test_concat(self): self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20)) foo = self.ds['foo'] bar = self.ds['bar'] # from dataset array: expected = DataArray(np.array([foo.values, bar.values]), dims=['w', 'x', 'y']) actual = concat([foo, bar], 'w') self.assertDataArrayEqual(expected, actual) # from iteration: grouped = [g for _, g in foo.groupby('x')] stacked = concat(grouped, self.ds['x']) self.assertDataArrayIdentical(foo, stacked) with self.assertRaisesRegexp(ValueError, 'not identical'): concat([foo, bar], compat='identical')
def combinevars(ds_in,dat_vars,new_dim_name='new_dim',combinevarname='new_var'): ds_out = xray.Dataset() ds_out = xray.concat([ds_in[dv] for dv in dat_vars],dim='new_dim') ds_out = ds_out.rename({'new_dim': new_dim_name}) ds_out.coords[new_dim_name] = dat_vars ds_out.name = combinevarname return ds_out
def read_all(self,channels, start_offset, end_offset, buffer): evs = self.events raw_bin_wrappers, original_eeg_files = self.__create_bin_readers() # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length() time_series_length = self.__compute_time_series_length() time_series_data = np.empty((len(channels),len(evs),time_series_length), dtype=np.float)*np.nan events = [] newdat_list = [] # for s,src in enumerate(usources): for s,(src,eegfile) in enumerate(zip(raw_bin_wrappers,original_eeg_files)): ind = np.atleast_1d( evs.eegfile == eegfile) if len(ind) == 1: events.append(evs[0]) else: events.append(evs[ind]) # print event_offsets #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events newdat = src.get_event_data_xray_simple(channels=channels,events=events, start_offset=start_offset,end_offset=end_offset,buffer=buffer) newdat_list.append(newdat) start_extend_time = time.time() #new code eventdata = xray.concat(newdat_list,dim='events') end_extend_time = time.time() # concatenate (must eventually check that dims match) # ORIGINAL CODE tdim = eventdata['time'] cdim = eventdata['channels'] # srate = eventdata.samplerate srate = eventdata.attrs['samplerate'] eventdata_xray = eventdata # eventdata_xray = xray.DataArray(np.squeeze(eventdata.values), coords=[cdim,tdim], dims=['channels','time']) # eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate'] if not self.keep_buffer: # trimming buffer data samples number_of_buffer_samples = self.get_number_of_samples_for_interval(self.buffer_time) if number_of_buffer_samples > 0: eventdata_xray = eventdata_xray[:,:,number_of_buffer_samples:-number_of_buffer_samples] return eventdata_xray
def filter(self): event_data_dict = OrderedDict() for eegfile_name, data in self.data_dict.items(): evs = self.events[self.events.eegfile == eegfile_name] samplerate = data.attrs['samplerate'] # used in constructing time_axis offset_time_array = data['time'].values['eegoffset'] event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift(ev=evs[0], samplerate=samplerate, offset_time_array=offset_time_array) event_time_axis = np.linspace(-self.buffer + self.time_shift, self.event_duration + self.buffer + self.time_shift, event_chunk_size) data_list = [] shape = None for i, ev in enumerate(evs): # print ev.eegoffset start_chop_pos = np.where(offset_time_array >= ev.eegoffset)[0][0] start_chop_pos += start_point_shift selector_array = np.arange(start=start_chop_pos, stop=start_chop_pos + event_chunk_size) # ev_array = eeg_session_data[:,:,selector_array] # ORIG CODE chopped_data_array = data.isel(time=selector_array) chopped_data_array['time'] = event_time_axis chopped_data_array['events'] = [i] data_list.append(chopped_data_array) # print i ev_concat_data = xray.concat(data_list, dim='events') # replacing simple events axis (consecutive integers) with recarray of events ev_concat_data['events'] = evs ev_concat_data.attrs['samplerate'] = samplerate ev_concat_data.attrs['time_shift'] = self.time_shift ev_concat_data.attrs['event_duration'] = self.event_duration ev_concat_data.attrs['buffer'] = self.buffer event_data_dict[eegfile_name] = TimeSeriesX(ev_concat_data) break # REMOVE THIS return event_data_dict
def read_data(data_dir, lat, lon, resample=None): files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)]) dss = [ xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files ] ds = xr.concat([dr.load() for dr in dss], 'time') if resample is not None: ds = ds.resample(resample, 'time') return ds
def test_concat_lazy(self): import dask.array as da arrays = [DataArray( da.from_array(InaccessibleArray(np.zeros((3, 3))), 3), dims=['x', 'y']) for _ in range(2)] # should not raise combined = concat(arrays, dim='z') self.assertEqual(combined.shape, (2, 3, 3)) self.assertEqual(combined.dims, ('z', 'x', 'y'))
def test_concat_lazy(self): import dask.array as da arrays = [ DataArray(da.from_array(InaccessibleArray(np.zeros((3, 3))), 3), dims=['x', 'y']) for _ in range(2) ] # should not raise combined = concat(arrays, dim='z') self.assertEqual(combined.shape, (2, 3, 3)) self.assertEqual(combined.dims, ('z', 'x', 'y'))
def ssn_average(var, onset, retreat, season): years = var['year'].values for y, year in enumerate(years): days = season_days(season, year, onset.values[y], retreat.values[y]) var_yr = atm.subset(var, {'year' : (year, year)}, squeeze=False) var_yr = var_yr.sel(dayrel=days).mean(dim='dayrel') if y == 0: var_out = var_yr else: var_out = xray.concat([var_out, var_yr], dim='year') return var_out
def test_lazy_array(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(u, v) self.assertLazyAndAllClose(-u, -v) self.assertLazyAndAllClose(u.T, v.T) self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(1 + u, 1 + v) actual = concat([v[:2], v[2:]], "x") self.assertLazyAndAllClose(u, actual)
def test_lazy_array(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(u, v) self.assertLazyAndAllClose(-u, -v) self.assertLazyAndAllClose(u.T, v.T) self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(1 + u, 1 + v) actual = concat([v[:2], v[2:]], 'x') self.assertLazyAndAllClose(u, actual)
def concat_plevs(datadir, year, varnm, plevs, pdim, version): pname = 'Height' for i, plev in enumerate(plevs): filenm = datafile(datadir, varnm, plev, year, version) print('Reading ' + filenm) with xray.open_dataset(filenm) as ds: var_in = ds[varnm].load() var_in = atm.expand_dims(var_in, pname, plev, axis=1) if i == 0: var = var_in else: var = xray.concat([var, var_in], dim=pname) return var
def wraparound_lon(arr, n=1, radians=True): """Append wrap-around points in longitude to the DataArray or Dataset. The longitude arraymust span from 0 to 360. While this will usually be the case, it's not guaranteed. Some pre-processing step should be implemented in the future that forces this to be the case. """ circumf = 2*np.pi if radians else 360. edge_left = arr.isel(**{LON_STR: 0}) edge_left[LON_STR] += circumf edge_right = arr.isel(**{LON_STR: -1}) edge_right[LON_STR] -= circumf return xray.concat([edge_right, arr, edge_left], dim=LON_STR)
def fromshot(shot, camera, los=None): """ Return the calibrated signal of the XtomoCamera LoS chosen. Parameters ---------- shot : int or MDSConnection Shot number or connection instance camera : int Number of the XTOMO camera los : int or sequence of ints Optional argument with lines of sight (LoS) of the chosen camera. If None, it loads all the 20 channels Returns ------- Calibrated signals XTOMO signals. Examples -------- >>> import tcv >>> cam = tcv.diag.XtomoCamera.fromshot(50766, camera=1, los=[4, 5]) """ if los is None: los = np.arange(20) + 1 else: los = np.atleast_1d(los) values = [] with tcv.shot(shot) as conn: for channel in XtomoCamera.channels(shot, camera, los=los): values.append(conn.tdi(channel, dims='time')) data = xray.concat(values, dim='los') data['los'] = los # Remove the offset before the shot data -= data.where(data.time < 0).mean(dim='time') # and now we normalize conveniently # FIXME: use xray's infrastructure to compute this gain, amp = XtomoCamera.gains(shot, camera, los=los) data *= np.transpose(np.tile(gain, (data.values.shape[1], 1)) / np.tile(amp, (data.values.shape[1], 1))) data.attrs.update({'camera': camera}) return data
def grabDateRange(input_dir,data,start='2010-01-01',end=dt.datetime.now()): rng = pd.date_range(start,end,freq='D') filerng = ['raw_MpalaTower_%04d_%03d.nc'% (date.year,date.dayofyear) for date in rng] ds_list = [] fileNames = [] FILEDIR = input_dir+data+'/' for fileName in set(filerng) & set(os.listdir(FILEDIR)): fileNames.append(fileName) fileNames.sort() for fileName in fileNames: ds_list.append(xray.open_dataset(FILEDIR+fileName,decode_times=True)) ds = xray.Dataset() ds = xray.concat((ds_list[0:]),dim='time') return ds
def daily_rel2onset(data, d_onset, npre, npost): """Return subset of daily data aligned relative to onset day. Parameters ---------- data : xray.DataArray Daily data. d_onset : ndarray Array of onset date (day of year) for each year. npre, npost : int Number of days before and after onset to extract. Returns ------- data_out : xray.DataArray Subset of N days of daily data for each year, where N = npre + npost + 1 and the day dimension is dayrel = day - d_onset. """ name, attrs, coords, dimnames = atm.meta(data) yearnm = atm.get_coord(data, 'year', 'name') daynm = atm.get_coord(data, 'day', 'name') years = atm.makelist(atm.get_coord(data, 'year')) if isinstance(d_onset, xray.DataArray): d_onset = d_onset.values else: d_onset = atm.makelist(d_onset) relnm = daynm + 'rel' for y, year in enumerate(years): dmin, dmax = d_onset[y] - npre, d_onset[y] + npost subset_dict = {yearnm : (year, None), daynm : (dmin, dmax)} sub = atm.subset(data, subset_dict) sub = sub.rename({daynm : relnm}) sub[relnm] = sub[relnm] - d_onset[y] sub[relnm].attrs['long_name'] = 'Day of year relative to onset day' if y == 0: data_out = sub else: data_out = xray.concat([data_out, sub], dim=yearnm) data_out.attrs['d_onset'] = d_onset return data_out
def pad_zl_to_zp1(self, array, fill_value=0., zlname='Zl', zp1name='Zp1'): """Pad an array located at zl points such that it is located at zp1 points. An additional fill value is required for the bottom point. Parameters ---------- array : xray DataArray The array to difference. Must have the coordinate zp1. fill_value : number, optional The value to be used at the bottom point. zlname : str, optional The variable name for the zl point zp1name : str, optional The variable name for the zp1 point Returns ------- padded : xray DataArray Padded array with vertical coordinate zp1. """ coords, dims = self._get_coords_from_dims(array.dims) zdim = dims.index(zlname) # shape of the new array to concat at the bottom shape = list(array.shape) shape[zdim] = 1 # replace Zl with the bottom level coords[zlname] = np.atleast_1d(self.ds[zp1name][-1].data) # an array of zeros at the bottom # need different behavior for numpy vs dask if array.chunks: chunks = list(array.data.chunks) chunks[zdim] = (1, ) zarr = fill_value * da.ones( shape, dtype=array.dtype, chunks=chunks) zeros = xray.DataArray(zarr, coords, dims).chunk() else: zarr = np.zeros(shape, array.dtype) zeros = xray.DataArray(zarr, coords, dims) newarray = xray.concat([array, zeros], dim=zlname).rename({zlname: zp1name}) if newarray.chunks: # this assumes that there was only one chunk in the vertical to begin with # how can we do that better return newarray.chunk({zp1name: len(newarray[zp1name])}) else: return newarray
def pad_zl_to_zp1(self, array, fill_value=0., zlname='Zl', zp1name='Zp1'): """Pad an array located at zl points such that it is located at zp1 points. An additional fill value is required for the bottom point. Parameters ---------- array : xray DataArray The array to difference. Must have the coordinate zp1. fill_value : number, optional The value to be used at the bottom point. zlname : str, optional The variable name for the zl point zp1name : str, optional The variable name for the zp1 point Returns ------- padded : xray DataArray Padded array with vertical coordinate zp1. """ coords, dims = self._get_coords_from_dims(array.dims) zdim = dims.index(zlname) # shape of the new array to concat at the bottom shape = list(array.shape) shape[zdim] = 1 # replace Zl with the bottom level coords[zlname] = np.atleast_1d(self.ds[zp1name][-1].data) # an array of zeros at the bottom # need different behavior for numpy vs dask if array.chunks: chunks = list(array.data.chunks) chunks[zdim] = (1,) zarr = fill_value * da.ones(shape, dtype=array.dtype, chunks=chunks) zeros = xray.DataArray(zarr, coords, dims).chunk() else: zarr = np.zeros(shape, array.dtype) zeros = xray.DataArray(zarr, coords, dims) newarray = xray.concat([array, zeros], dim=zlname).rename({zlname: zp1name}) if newarray.chunks: # this assumes that there was only one chunk in the vertical to begin with # how can we do that better return newarray.chunk({zp1name: len(newarray[zp1name])}) else: return newarray
def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})] actual = concat(objs, 'x') expected = Dataset({'x': [0, 1]}) self.assertDatasetIdentical(actual, expected) objs = [Dataset({'x': [0]}), Dataset({}, {'x': 1})] actual = concat(objs, 'x') self.assertDatasetIdentical(actual, expected) # mixed dims between variables objs = [Dataset({'x': [2], 'y': 3}), Dataset({'x': [4], 'y': 5})] actual = concat(objs, 'x') expected = Dataset({'x': [2, 4], 'y': ('x', [3, 5])}) self.assertDatasetIdentical(actual, expected) # mixed dims in coord variable objs = [ Dataset({'x': [0]}, {'y': -1}), Dataset({'x': [1]}, {'y': ('x', [-2])}) ] actual = concat(objs, 'x') expected = Dataset({'x': [0, 1]}, {'y': ('x', [-1, -2])}) self.assertDatasetIdentical(actual, expected) # scalars with mixed lengths along concat dim -- values should repeat objs = [ Dataset({'x': [0]}, {'y': -1}), Dataset({'x': [1, 2]}, {'y': -2}) ] actual = concat(objs, 'x') expected = Dataset({}, {'y': ('x', [-1, -2, -2])}) self.assertDatasetIdentical(actual, expected) # broadcast 1d x 1d -> 2d objs = [ Dataset({'z': ('x', [-1])}, { 'x': [0], 'y': [0] }), Dataset({'z': ('y', [1])}, { 'x': [1], 'y': [0] }) ] actual = concat(objs, 'x') expected = Dataset({'z': (('x', 'y'), [[-1], [1]])}) self.assertDatasetIdentical(actual, expected)
def binned_probability_plot(variable, bin_divs, ax=None, **kwdargs): """ Creates a plot showing the binned probability of the data in variable. """ ax, _ = utils.axis_figure(axis=ax) if variable.dims == ('time', ): variable = (xray.concat([variable], 'realization') .transpose('time', 'realization')) assert variable.dims == ('time', 'realization') n_times, n_real = variable.shape # compute the binned probabilities probs = bin_probs(variable, bin_divs) # default to a blue colormap placed in the background kwdargs['cmap'] = kwdargs.get('cmap', plt.cm.get_cmap('Blues')) kwdargs['zorder'] = kwdargs.get('zorder', -100) # plot the probabilities y, x = np.meshgrid(np.arange(bin_divs.size), np.arange(variable['time'].size + 1)) pm = ax.pcolormesh(x, y, probs.values, norm=plt.Normalize(vmin=0., vmax=1.), **kwdargs) return pm
def create_sync_delimited(self): # Currently this code assumes that the request only contained one stream. Otherwise code will break # in the future we may squash all of the differing streams into one result. if len(self.stream_param_map) > 1: raise StreamEngineException("Should not have more than one stream in request for delimited data", 500) datasets = [] output_vars = [] for sk, deployment, ds in self.stream_data.groups(self.stream_param_map.keys()[0]): ds, output_vars = self._fix_for_sync(ds, self.stream_param_map[sk]) datasets.append(ds) final_data = xray.concat(datasets, dim='obs') final_data['obs'].values = numpy.arange(0, final_data['obs'].size, dtype=numpy.int32) key_vars = ['subsite', 'node', 'sensor', 'stream'] # output columns in the correct order output_vars = output_vars[:4] + key_vars + output_vars[4:] final_data.attrs['subsite'] = datasets[0].attrs['subsite'] final_data.attrs['node'] = datasets[0].attrs['node'] final_data.attrs['sensor'] = datasets[0].attrs['sensor'] final_data.attrs['stream'] = datasets[0].attrs['stream'] with tempfile.NamedTemporaryFile() as tf: self._write_csv_out(tf.file, final_data, output_vars, set(key_vars)) tf.seek(0) return tf.read()
def cen_diff(cls, arr, dim, spacing=1, is_coord=False, do_edges_one_sided=False): """Centered differencing of the DataArray or Dataset. :param arr: Data to be center-differenced. :type arr: `xray.DataArray` or `xray.Dataset` :param str dim: Dimension over which to perform the differencing. :param int spacing: How many gridpoints over to use. Size of resulting array depends on this value. :param do_edges_one_sided: Whether or not to fill in the edge cells that don't have the needed neighbor cells for the stencil. If `True`, use one-sided differencing with the same order of accuracy as `order`, and the outputted array is the same shape as `arr`. If `False`, the outputted array has a length in the computed axis reduced by `order`. """ if spacing < 1: raise ValueError("Centered differencing cannot have spacing < 1") left = arr.isel(**{dim: slice(0, -spacing)}) right = arr.isel(**{dim: slice(spacing, None)}) # Centered differencing = sum of intermediate forward differences diff = (cls.fwd_diff1(right, dim, is_coord=is_coord) + cls.bwd_diff1(left, dim, is_coord=is_coord)) if do_edges_one_sided: left = arr.isel(**{dim: slice(0, 2)}) right = arr.isel(**{dim: slice(-2, None)}) diff_left = cls.fwd_diff1(left, dim, is_coord=is_coord) diff_right = cls.bwd_diff1(right, dim, is_coord=is_coord) diff = xray.concat([diff_left, diff, diff_right], dim=dim) return diff
def fromshot(shotnum, los=None): """ Read the ECE LFS data from the specified shot """ with tcv.shot(shotnum) as conn: try: frequency = conn.tdi(r'\results::ece_lfs:rf_freqs') except: # FIXME: catch more specific exception frequency = Lfs.DEFAULT_FREQUENCIES type(frequency) if los: # remember that we use the los as index for channels los = np.atleast_1d(los) - 1 else: los = np.arange(frequency.size) values = [] used_los = [] with tcv.shot(shotnum) as conn: for i, channel in enumerate(Lfs.channels(conn.shot)): if i in los: values.append(conn.tdi(channel, dims='time')) used_los.append(i + 1) data = xray.concat(values, dim='los') data.coords['los'] = used_los # TODO: add frequency coordinate # Normalize to mean value mean = data.where(data.time < 0).mean(dim='time') data = (data - mean) / mean # Fill-in data attributes with tcv.shot(shotnum) as conn: data.attrs['z_antenna'] = Lfs.zpos(conn) return data
def fromshot(shotnum, los=None): """ Read the ECE LFS data from the specified shot """ with tcv.shot(shotnum) as conn: try: frequency = conn.tdi(r'\results::ece_lfs:rf_freqs') except: # FIXME: catch more specific exception frequency = Lfs.DEFAULT_FREQUENCIES type(frequency) if los: # remember that we use the los as index for channels los = np.atleast_1d(los)-1 else: los = np.arange(frequency.size) values = [] used_los = [] with tcv.shot(shotnum) as conn: for i, channel in enumerate(Lfs.channels(conn.shot)): if i in los: values.append(conn.tdi(channel, dims='time')) used_los.append(i+1) data = xray.concat(values, dim='los') data.coords['los'] = used_los # TODO: add frequency coordinate # Normalize to mean value mean = data.where(data.time < 0).mean(dim='time') data = (data - mean) / mean # Fill-in data attributes with tcv.shot(shotnum) as conn: data.attrs['z_antenna'] = Lfs.zpos(conn) return data
def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})] actual = concat(objs, 'x') expected = Dataset({'x': [0, 1]}) self.assertDatasetIdentical(actual, expected) objs = [Dataset({'x': [0]}), Dataset({}, {'x': 1})] actual = concat(objs, 'x') self.assertDatasetIdentical(actual, expected) # mixed dims between variables objs = [Dataset({'x': [2], 'y': 3}), Dataset({'x': [4], 'y': 5})] actual = concat(objs, 'x') expected = Dataset({'x': [2, 4], 'y': ('x', [3, 5])}) self.assertDatasetIdentical(actual, expected) # mixed dims in coord variable objs = [Dataset({'x': [0]}, {'y': -1}), Dataset({'x': [1]}, {'y': ('x', [-2])})] actual = concat(objs, 'x') expected = Dataset({'x': [0, 1]}, {'y': ('x', [-1, -2])}) self.assertDatasetIdentical(actual, expected) # scalars with mixed lengths along concat dim -- values should repeat objs = [Dataset({'x': [0]}, {'y': -1}), Dataset({'x': [1, 2]}, {'y': -2})] actual = concat(objs, 'x') expected = Dataset({}, {'y': ('x', [-1, -2, -2])}) self.assertDatasetIdentical(actual, expected) # broadcast 1d x 1d -> 2d objs = [Dataset({'z': ('x', [-1])}, {'x': [0], 'y': [0]}), Dataset({'z': ('y', [1])}, {'x': [1], 'y': [0]})] actual = concat(objs, 'x') expected = Dataset({'z': (('x', 'y'), [[-1], [1]])}) self.assertDatasetIdentical(actual, expected)
def test_concat_data_vars(self): data = Dataset({'foo': ('x', np.random.randn(10))}) objs = [data.isel(x=slice(5)), data.isel(x=slice(5, None))] for data_vars in ['minimal', 'different', 'all', [], ['foo']]: actual = concat(objs, dim='x', data_vars=data_vars) self.assertDatasetIdentical(data, actual)
def calculate(dbf, comps, phases, mode=None, output='GM', fake_points=False, **kwargs): """ Sample the property surface of 'output' containing the specified components and phases. Model parameters are taken from 'dbf' and any state variables (T, P, etc.) can be specified as keyword arguments. Parameters ---------- dbf : Database Thermodynamic database containing the relevant parameters. comps : str or sequence Names of components to consider in the calculation. phases : str or sequence Names of phases to consider in the calculation. mode : string, optional See 'make_callable' docstring for details. output : string, optional Model attribute to sample. fake_points : bool, optional (Default: False) If True, the first few points of the output surface will be fictitious points used to define an equilibrium hyperplane guaranteed to be above all the other points. This is used for convex hull computations. points : ndarray or a dict of phase names to ndarray, optional Columns of ndarrays must be internal degrees of freedom (site fractions), sorted. If this is not specified, points will be generated automatically. pdens : int, a dict of phase names to int, or a seq of both, optional Number of points to sample per degree of freedom. model : Model, a dict of phase names to Model, or a seq of both, optional Model class to use for each phase. Returns ------- xray.Dataset of the sampled attribute as a function of state variables Examples -------- None yet. """ # Here we check for any keyword arguments that are special, i.e., # there may be keyword arguments that aren't state variables pdens_dict = unpack_kwarg(kwargs.pop('pdens', 2000), default_arg=2000) points_dict = unpack_kwarg(kwargs.pop('points', None), default_arg=None) model_dict = unpack_kwarg(kwargs.pop('model', Model), default_arg=Model) callable_dict = unpack_kwarg(kwargs.pop('callables', None), default_arg=None) if isinstance(phases, str): phases = [phases] if isinstance(comps, str): comps = [comps] components = [x for x in sorted(comps) if not x.startswith('VA')] # Convert keyword strings to proper state variable objects # If we don't do this, sympy will get confused during substitution statevar_dict = collections.OrderedDict((v.StateVariable(key), unpack_condition(value)) \ for (key, value) in sorted(kwargs.items())) str_statevar_dict = collections.OrderedDict((str(key), unpack_condition(value)) \ for (key, value) in statevar_dict.items()) all_phase_data = [] comp_sets = {} largest_energy = -np.inf maximum_internal_dof = 0 # Consider only the active phases active_phases = dict((name.upper(), dbf.phases[name.upper()]) \ for name in unpack_phases(phases)) for phase_name, phase_obj in sorted(active_phases.items()): # Build the symbolic representation of the energy mod = model_dict[phase_name] # if this is an object type, we need to construct it if isinstance(mod, type): try: model_dict[phase_name] = mod = mod(dbf, comps, phase_name) except DofError: # we can't build the specified phase because the # specified components aren't found in every sublattice # we'll just skip it logger.warning( """Suspending specified phase %s due to some sublattices containing only unspecified components""", phase_name) continue if points_dict[phase_name] is None: try: out = getattr(mod, output) maximum_internal_dof = max(maximum_internal_dof, len(out.atoms(v.SiteFraction))) except AttributeError: raise AttributeError( 'Missing Model attribute {0} specified for {1}'.format( output, mod.__class__)) else: maximum_internal_dof = max( maximum_internal_dof, np.asarray(points_dict[phase_name]).shape[-1]) for phase_name, phase_obj in sorted(active_phases.items()): try: mod = model_dict[phase_name] except KeyError: continue # Construct an ordered list of the variables variables, sublattice_dof = generate_dof(phase_obj, mod.components) # Build the "fast" representation of that model if callable_dict[phase_name] is None: out = getattr(mod, output) # As a last resort, treat undefined symbols as zero # But warn the user when we do this # This is consistent with TC's behavior undefs = list(out.atoms(Symbol) - out.atoms(v.StateVariable)) for undef in undefs: out = out.xreplace({undef: float(0)}) logger.warning( 'Setting undefined symbol %s for phase %s to zero', undef, phase_name) comp_sets[phase_name] = make_callable(out, \ list(statevar_dict.keys()) + variables, mode=mode) else: comp_sets[phase_name] = callable_dict[phase_name] points = points_dict[phase_name] if points is None: # Eliminate pure vacancy endmembers from the calculation vacancy_indices = list() for idx, sublattice in enumerate(phase_obj.constituents): active_in_subl = sorted( set(phase_obj.constituents[idx]).intersection(comps)) if 'VA' in active_in_subl and 'VA' in sorted(comps): vacancy_indices.append(active_in_subl.index('VA')) if len(vacancy_indices) != len(phase_obj.constituents): vacancy_indices = None logger.debug('vacancy_indices: %s', vacancy_indices) # Add all endmembers to guarantee their presence points = endmember_matrix(sublattice_dof, vacancy_indices=vacancy_indices) # Sample composition space for more points if sum(sublattice_dof) > len(sublattice_dof): points = np.concatenate( (points, point_sample(sublattice_dof, pdof=pdens_dict[phase_name]))) # If there are nontrivial sublattices with vacancies in them, # generate a set of points where their fraction is zero and renormalize for idx, sublattice in enumerate(phase_obj.constituents): if 'VA' in set(sublattice) and len(sublattice) > 1: var_idx = variables.index( v.SiteFraction(phase_name, idx, 'VA')) addtl_pts = np.copy(points) # set vacancy fraction to log-spaced between 1e-10 and 1e-6 addtl_pts[:, var_idx] = np.power( 10.0, -10.0 * (1.0 - addtl_pts[:, var_idx])) # renormalize site fractions cur_idx = 0 for ctx in sublattice_dof: end_idx = cur_idx + ctx addtl_pts[:, cur_idx:end_idx] /= \ addtl_pts[:, cur_idx:end_idx].sum(axis=1)[:, None] cur_idx = end_idx # add to points matrix points = np.concatenate((points, addtl_pts), axis=0) # Filter out nan's that may have slipped in if we sampled too high a vacancy concentration # Issues with this appear to be platform-dependent points = points[~np.isnan(points).any(axis=-1)] # Ensure that points has the correct dimensions and dtype points = np.atleast_2d(np.asarray(points, dtype=np.float)) phase_ds = _compute_phase_values(phase_obj, components, variables, str_statevar_dict, points, comp_sets[phase_name], output, maximum_internal_dof) # largest_energy is really only relevant if fake_points is set if fake_points: largest_energy = max(phase_ds[output].max(), largest_energy) all_phase_data.append(phase_ds) if fake_points: if output != 'GM': raise ValueError( 'fake_points=True should only be used with output=\'GM\'') phase_ds = _generate_fake_points(components, statevar_dict, largest_energy, output, maximum_internal_dof) final_ds = xray.concat(itertools.chain([phase_ds], all_phase_data), dim='points') else: # speedup for single-phase case (found by profiling) if len(all_phase_data) > 1: final_ds = xray.concat(all_phase_data, dim='points') else: final_ds = all_phase_data[0] if (not fake_points) and (len(all_phase_data) == 1): pass else: # Reset the points dimension to use a single global index final_ds['points'] = np.arange(len(final_ds.points)) return final_ds
ens5 = ens5.mean(dim='lon') timerange = range(31,39) #timerange = range(35,47) #timerange = range(0,59) O3_c=getattr(ensc,var).sel(lat=p) O3_1=getattr(ens1,var).sel(lat=p) O3_2=getattr(ens2,var).sel(lat=p) O3_3=getattr(ens3,var).sel(lat=p) O3_4=getattr(ens4,var).sel(lat=p) O3_5=getattr(ens5,var).sel(lat=p) O3_c=xray.concat([O3_c,O3_c,O3_c,O3_c,O3_c],dim='time') O3_c=O3_c.isel(time=timerange) O3_1=O3_1.isel(time=timerange) O3_2=O3_2.isel(time=timerange) O3_3=O3_3.isel(time=timerange) O3_4=O3_4.isel(time=timerange) O3_5=O3_5.isel(time=timerange) if 'BR' in var: O3_c=O3_c*1e9 O3_1=O3_1*1e9 O3_2=O3_2*1e9 O3_3=O3_3*1e9 O3_4=O3_4*1e9 O3_5=O3_5*1e9
dims=['channels', 'events', 'frequency', 'time'] ) bp_sess_1 = session_data.values[1, 0, :] - session_data.values[2, 0, :] print bp_sess_1 pow_sess_new_1 = phase_pow_multi(freqs, bp_sess_1, to_return='power', samplerates=ev_data.attrs['samplerate']) print pow_sess_new_1 pow_xray_1 = xray.DataArray(pow_sess_new_1.reshape(1, 1, pow_sess_new_1.shape[0], pow_sess_new_1.shape[1]), coords=[['003_004'], np.arange(1), freqs, session_data['time']], dims=['channels', 'events', 'frequency', 'time'] ) pow_combined = xray.concat([pow_xray_0, pow_xray_1], dim='channels') pow_combined.attrs['samplerate'] = ev_data.attrs['samplerate'] edcw = EventDataChopper(events=base_events, event_duration=1.6, buffer=1.0, data_dict={base_events[0].eegfile: pow_combined}) chopped_wavelets = edcw.filter() print # class EventDataChopper(PropertiedObject): # _descriptors = [ # # TypeValTuple('time_shift', float, 0.0), # TypeValTuple('event_duration', float, 0.0),
#CREM.parameters() # Temporary container for read-in data arrays = {} def label(variable, desc, unit_long, unit_short): """Add some descriptive attributes to an xray.DataArray.""" arrays[variable].attrs.update({'desc': desc, 'unit_long': unit_long, 'unit_short': unit_short}) # Cell: # GDP temp = [raw[case].extract('gdp_ref') for case in cases] arrays['GDP'] = xray.concat(temp, dim=cases).sel(rs=CREM.set('r')) .rename({'rs': 'r'}) label('GDP', 'Gross domestic product', 'billions of U.S. dollars, constant at 2007', '10⁹ USD') arrays['GDP_aagr'] = ((arrays['GDP'][:,:,1:].values / arrays['GDP'][:,:,:-1]) ** (1 / CREM.extract('lp')) - 1) * 100 label('GDP_aagr', 'Gross domestic product, average annual growth rate', 'percent', '%') arrays['GDP_delta'] = (arrays['GDP'] / arrays['GDP'].sel(case='bau') - 1) * 100 label('GDP_delta', 'Change in gross domestic product relative to BAU', 'percent', '%') # Cell:
event_time_axis = np.linspace(-1.0, 2.6, len(selector_array)) ev_array = ts[:, :, selector_array] ev_array['time'] = event_time_axis ev_array['events'] = [i] ev_data_list.append(ev_array) # ev_data_list.append(ts[:,:,selector_array].values) print i # print ev_array if i == 2: break eventdata = xray.concat(ev_data_list, dim='events') # eventdata =np.concatenate(ev_data_list,axis=1) # eventdata = xray.concat(ev_data_list,dim='events') print eventdata # eegoffset_time_array = ts['time'].values['eegoffset'] # # ev_data_list = [] # for i, ev in enumerate(base_events_0): # print ev.eegoffset # start_offset = ev.eegoffset-int(np.ceil(buffer*samplerate)) # end_offset = ev.eegoffset+int(np.ceil((ev_duration+buffer)*samplerate)) # print "start_offset,end_offset, size=",start_offset,end_offset,end_offset-start_offset
def read_all(self, channels, start_offset, end_offset, buffer): evs = self.events raw_bin_wrappers, original_eeg_files = self.__create_bin_readers() # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length() time_series_length = self.__compute_time_series_length() time_series_data = np.empty( (len(channels), len(evs), time_series_length), dtype=np.float) * np.nan events = [] newdat_list = [] # for s,src in enumerate(usources): for s, (src, eegfile) in enumerate(zip(raw_bin_wrappers, original_eeg_files)): ind = np.atleast_1d(evs.eegfile == eegfile) if len(ind) == 1: events.append(evs[0]) else: events.append(evs[ind]) # print event_offsets #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events newdat = src.get_event_data_xray_simple(channels=channels, events=events, start_offset=start_offset, end_offset=end_offset, buffer=buffer) newdat_list.append(newdat) start_extend_time = time.time() #new code eventdata = xray.concat(newdat_list, dim='events') end_extend_time = time.time() # concatenate (must eventually check that dims match) # ORIGINAL CODE tdim = eventdata['time'] cdim = eventdata['channels'] # srate = eventdata.samplerate srate = eventdata.attrs['samplerate'] eventdata_xray = eventdata # eventdata_xray = xray.DataArray(np.squeeze(eventdata.values), coords=[cdim,tdim], dims=['channels','time']) # eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate'] if not self.keep_buffer: # trimming buffer data samples number_of_buffer_samples = self.get_number_of_samples_for_interval( self.buffer_time) if number_of_buffer_samples > 0: eventdata_xray = eventdata_xray[:, :, number_of_buffer_samples: -number_of_buffer_samples] return eventdata_xray
def read(self, channels): evs = self.events raw_bin_wrappers, original_eeg_files = self.__create_bin_readers() # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length() time_series_length = self.__compute_time_series_length() time_series_data = np.empty( (len(channels), len(evs), time_series_length), dtype=np.float) * np.nan # usources = np.unique(raw_bin_wrappers) ordered_indices = np.arange(len(evs)) event_indices_list = [] events = [] newdat_list = [] eventdata = None # for s,src in enumerate(usources): for s, (src, eegfile) in enumerate(zip(raw_bin_wrappers, original_eeg_files)): ind = np.atleast_1d(evs.eegfile == eegfile) event_indices_list.append(ordered_indices[ind]) # if verbose: # if not s%10: # print 'Reading event %d'%s if len(ind) == 1: event_offsets = evs['eegoffset'] events.append(evs) else: event_offsets = evs[ind]['eegoffset'] events.append(evs[ind]) # print event_offsets #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events newdat = src.get_event_data_xray(channels, event_offsets, self.start_time, self.end_time, self.buffer_time, resampled_rate=None, filt_freq=None, filt_type=None, filt_order=None, keep_buffer=self.keep_buffer, loop_axis=None, num_mp_procs=0, eoffset='eegoffset', eoffset_in_time=False) newdat_list.append(newdat) event_indices_array = np.hstack(event_indices_list) event_indices_restore_sort_order_array = event_indices_array.argsort() start_extend_time = time.time() #new code eventdata = xray.concat(newdat_list, dim='events') end_extend_time = time.time() # concatenate (must eventually check that dims match) # ORIGINAL CODE tdim = eventdata['time'] cdim = eventdata['channels'] # srate = eventdata.samplerate srate = eventdata.attrs['samplerate'] events = np.concatenate(events).view(Events) eventdata_xray = xray.DataArray(eventdata.values, coords=[cdim, events, tdim], dims=['channels', 'events', 'time']) eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate'] eventdata_xray = eventdata_xray[:, event_indices_restore_sort_order_array, :] #### RESTORE THIS if not self.keep_buffer: # trimming buffer data samples number_of_buffer_samples = self.get_number_of_samples_for_interval( self.buffer_time) if number_of_buffer_samples > 0: eventdata_xray = eventdata_xray[:, :, number_of_buffer_samples: -number_of_buffer_samples] return TimeSeriesX(eventdata_xray)
def test_concat_errors(self): data = create_test_data() split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] with self.assertRaisesRegexp(ValueError, 'must supply at least one'): concat([], 'dim1') with self.assertRaisesRegexp(ValueError, 'are not coordinates'): concat([data, data], 'new_dim', coords=['not_found']) with self.assertRaisesRegexp(ValueError, 'global attributes not'): data0, data1 = deepcopy(split_data) data1.attrs['foo'] = 'bar' concat([data0, data1], 'dim1', compat='identical') self.assertDatasetIdentical( data, concat([data0, data1], 'dim1', compat='equals')) with self.assertRaisesRegexp(ValueError, 'encountered unexpected'): data0, data1 = deepcopy(split_data) data1['foo'] = ('bar', np.random.randn(10)) concat([data0, data1], 'dim1') with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): data0, data1 = deepcopy(split_data) data1['dim2'] = 2 * data1['dim2'] concat([data0, data1], 'dim1', coords='minimal') with self.assertRaisesRegexp(ValueError, 'it is not 1-dimensional'): concat([data0, data1], 'dim1') with self.assertRaisesRegexp(ValueError, 'compat.* invalid'): concat(split_data, 'dim1', compat='foobar') with self.assertRaisesRegexp(ValueError, 'unexpected value for'): concat([data, data], 'new_dim', coords='foobar') with self.assertRaisesRegexp( ValueError, 'coordinate in some datasets but not others'): concat([Dataset({'x': 0}), Dataset({'x': [1]})], dim='z') with self.assertRaisesRegexp( ValueError, 'coordinate in some datasets but not others'): concat([Dataset({'x': 0}), Dataset({}, {'x': 1})], dim='z') with self.assertRaisesRegexp(ValueError, 'no longer a valid'): concat([data, data], 'new_dim', mode='different') with self.assertRaisesRegexp(ValueError, 'no longer a valid'): concat([data, data], 'new_dim', concat_over='different')
def test_concat_dim_is_variable(self): objs = [Dataset({'x': 0}), Dataset({'x': 1})] coord = Variable('y', [3, 4]) expected = Dataset({'x': ('y', [0, 1]), 'y': [3, 4]}) actual = concat(objs, coord) self.assertDatasetIdentical(actual, expected)
def roll(self, array, n, dim): """Clone of numpy.roll for xray DataArrays.""" left = array.isel(**{dim: slice(None, -n)}) right = array.isel(**{dim: slice(-n, None)}) return xray.concat([right, left], dim=dim)
''' NAME ECMWF PyToolBox - Merging two or more Netcdf4 files PURPOSE This script merge two or more Netcdf4 files and export a Netcdf4 with diffrent datasets PROGRAMMER(S) Shayan Davarzani ([email protected]) [Master of Engineering - Civil Engineering] REFERENCES Institute of Earth Sciences Coders -- https://iescoders.com/2017/10/03/reading-netcdf4-data-in-python/ Dr. Ali Asghar Golshani -- My Best and Scientist Teacher -- https://ir.linkedin.com/in/aliasghar-golshani-57a78414/ IA University Central Tehran Branch-- https://www.iau.ac.ir/ ''' import xray urls = ["tez\persian-gulf-1979-wave.nc","tez\persian-gulf-1980-1981-wave.nc"] #input files for merging datasets = [xray.open_dataset(url) for url in urls] merged = xray.concat(datasets, 'forecast_time') merged.to_netcdf('all-data.nc')
# set up some vars for the output naming standardization cmor_table = os.path.splitext( os.path.basename(fn))[0].split('_')[1] experiment = scenario = os.path.splitext( os.path.basename(fn))[0].split('_')[-2] scenario = os.path.splitext(os.path.basename(fn))[0].split('_')[-3] if not os.path.exists(output_dir): os.makedirs(output_dir) # run the concatenation and the output to a new netcdf file # --> and we are writing in a hack to get around the darn issue with GFDL-CM3 # we could just run them all with the reduce workaround, but I will keep both # in hopes that the library improves. if 'GFDL' in model: ds = reduce(lambda x, y: xray.concat([x, y], 'time'), (xray.open_dataset(i) for i in files)) else: ds = xray.concat([xray.open_dataset(i).load() for i in files], 'time') new_ds = year_greater_yearlimit_workaround( ds, int(begin_year_fnout[:4]), int(end_year_fnout[:4]), int(str(begin_year_in)[:4]), int(str(end_year_in)[:4])) begin_year_fnout = str( int(begin_year_fnout[:4]) + (int(begin_year_in[:4]) - int(begin_year_fnout[:4])) ) + '01' # to update the output naming # output name generation new_fn_base = '_'.join([ variable, cmor_table, model, scenario, experiment,