Ejemplo n.º 1
0
def wrapyear(data, data_prev, data_next, daymin, daymax, year=None):
    """Wrap daily data from previous and next years for extended day ranges.
    """
    daynm = atm.get_coord(data, 'day', 'name')

    def leap_adjust(data, year):
        data = atm.squeeze(data)
        ndays = 365
        if year is not None and atm.isleap(year):
            ndays += 1
        else:
            # Remove NaN for day 366 in non-leap year
            data = atm.subset(data, {'day' : (1, ndays)})
        return data, ndays

    data, ndays = leap_adjust(data, year)
    if data_prev is not None:
        data_prev, ndays_prev = leap_adjust(data_prev, year - 1)
        data_prev[daynm] = data_prev[daynm] - ndays_prev
        data_out = xray.concat([data_prev, data], dim=daynm)
    else:
        data_out = data
    if data_next is not None:
        data_next, _ = leap_adjust(data_next, year + 1)
        data_next[daynm] = data_next[daynm] + ndays
        data_out = xray.concat([data_out, data_next], dim=daynm)
    data_out = atm.subset(data_out, {daynm : (daymin, daymax)})

    return data_out
Ejemplo n.º 2
0
    def test_concat_size0(self):
        data = create_test_data()
        split_data = [data.isel(dim1=slice(0, 0)), data]
        actual = concat(split_data, 'dim1')
        self.assertDatasetIdentical(data, actual)

        actual = concat(split_data[::-1], 'dim1')
        self.assertDatasetIdentical(data, actual)
Ejemplo n.º 3
0
    def test_concat_size0(self):
        data = create_test_data()
        split_data = [data.isel(dim1=slice(0, 0)), data]
        actual = concat(split_data, 'dim1')
        self.assertDatasetIdentical(data, actual)

        actual = concat(split_data[::-1], 'dim1')
        self.assertDatasetIdentical(data, actual)
Ejemplo n.º 4
0
    def test_concat(self):
        # TODO: simplify and split this test case

        # drop the third dimension to keep things relatively understandable
        data = create_test_data().drop('dim3')

        split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
        self.assertDatasetIdentical(data, concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(dict((k, v.transpose(*data[k].dims))
                                for k, v in iteritems(dataset.data_vars)),
                           dataset.coords,
                           attrs=dataset.attrs)

        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, concat(datasets, dim))
            self.assertDatasetIdentical(data, concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim], coords='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [
                k for k, v in iteritems(data.coords)
                if dim in v.dims and k != dim
            ]
            actual = concat(datasets, data[dim], coords=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = concat(datasets, data[dim], coords='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # make sure the coords argument behaves as expected
        data.coords['extra'] = ('dim4', np.arange(3))
        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            actual = concat(datasets, data[dim], coords='all')
            expected = np.array(
                [data['extra'].values for _ in range(data.dims[dim])])
            self.assertArrayEqual(actual['extra'].values, expected)

            actual = concat(datasets, data[dim], coords='different')
            self.assertDataArrayEqual(data['extra'], actual['extra'])
            actual = concat(datasets, data[dim], coords='minimal')
            self.assertDataArrayEqual(data['extra'], actual['extra'])

        # verify that the dim argument takes precedence over
        # concatenating dataset variables of the same name
        dim = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dim
        self.assertDatasetIdentical(expected, concat(datasets, dim))
Ejemplo n.º 5
0
 def test_concat_constant_index(self):
     # GH425
     ds1 = Dataset({'foo': 1.5}, {'y': 1})
     ds2 = Dataset({'foo': 2.5}, {'y': 1})
     expected = Dataset({'foo': ('y', [1.5, 2.5]), 'y': [1, 1]})
     for mode in ['different', 'all', ['foo']]:
         actual = concat([ds1, ds2], 'y', data_vars=mode)
         self.assertDatasetIdentical(expected, actual)
     with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
         concat([ds1, ds2], 'y', data_vars='minimal')
Ejemplo n.º 6
0
 def test_concat_constant_index(self):
     # GH425
     ds1 = Dataset({'foo': 1.5}, {'y': 1})
     ds2 = Dataset({'foo': 2.5}, {'y': 1})
     expected = Dataset({'foo': ('y', [1.5, 2.5]), 'y': [1, 1]})
     for mode in ['different', 'all', ['foo']]:
         actual = concat([ds1, ds2], 'y', data_vars=mode)
         self.assertDatasetIdentical(expected, actual)
     with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
         concat([ds1, ds2], 'y', data_vars='minimal')
Ejemplo n.º 7
0
    def test_concat(self):
        # TODO: simplify and split this test case

        # drop the third dimension to keep things relatively understandable
        data = create_test_data().drop('dim3')

        split_data = [data.isel(dim1=slice(3)),
                      data.isel(dim1=slice(3, None))]
        self.assertDatasetIdentical(data, concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(dict((k, v.transpose(*data[k].dims))
                                for k, v in iteritems(dataset.data_vars)),
                           dataset.coords, attrs=dataset.attrs)

        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, concat(datasets, dim))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim], coords='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [k for k, v in iteritems(data.coords)
                           if dim in v.dims and k != dim]
            actual = concat(datasets, data[dim], coords=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = concat(datasets, data[dim], coords='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # make sure the coords argument behaves as expected
        data.coords['extra'] = ('dim4', np.arange(3))
        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            actual = concat(datasets, data[dim], coords='all')
            expected = np.array([data['extra'].values
                                 for _ in range(data.dims[dim])])
            self.assertArrayEqual(actual['extra'].values, expected)

            actual = concat(datasets, data[dim], coords='different')
            self.assertDataArrayEqual(data['extra'], actual['extra'])
            actual = concat(datasets, data[dim], coords='minimal')
            self.assertDataArrayEqual(data['extra'], actual['extra'])

        # verify that the dim argument takes precedence over
        # concatenating dataset variables of the same name
        dim = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dim
        self.assertDatasetIdentical(expected, concat(datasets, dim))
Ejemplo n.º 8
0
    def test_concat(self):
        ds = Dataset({
            'foo': (['x', 'y'], np.random.random((10, 20))),
            'bar': (['x', 'y'], np.random.random((10, 20)))
        })
        foo = ds['foo']
        bar = ds['bar']

        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, ds['x'])
        self.assertDataArrayIdentical(foo, stacked)
        # with an index as the 'dim' argument
        stacked = concat(grouped, ds.indexes['x'])
        self.assertDataArrayIdentical(foo, stacked)

        actual = concat([foo[0], foo[1]],
                        pd.Index([0, 1])).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], dim='w', compat='identical')

        with self.assertRaisesRegexp(ValueError, 'not a valid argument'):
            concat([foo, bar], dim='w', data_vars='minimal')
Ejemplo n.º 9
0
    def test_concat(self):
        ds = Dataset({'foo': (['x', 'y'], np.random.random((10, 20))),
                      'bar': (['x', 'y'], np.random.random((10, 20)))})
        foo = ds['foo']
        bar = ds['bar']

        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, ds['x'])
        self.assertDataArrayIdentical(foo, stacked)
        # with an index as the 'dim' argument
        stacked = concat(grouped, ds.indexes['x'])
        self.assertDataArrayIdentical(foo, stacked)

        actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], dim='w', compat='identical')

        with self.assertRaisesRegexp(ValueError, 'not a valid argument'):
            concat([foo, bar], dim='w', data_vars='minimal')
Ejemplo n.º 10
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0),
             data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Ejemplo n.º 11
0
    def test_concat_do_not_promote(self):
        # GH438
        objs = [Dataset({'y': ('t', [1])}, {'x': 1}),
                Dataset({'y': ('t', [2])}, {'x': 1})]
        expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]})
        actual = concat(objs, 't')
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({'y': ('t', [1])}, {'x': 1}),
                Dataset({'y': ('t', [2])}, {'x': 2})]
        with self.assertRaises(ValueError):
            concat(objs, 't', coords='minimal')
Ejemplo n.º 12
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [
         data.isel(x=slice(5)).assign_coords(c=0),
         data.isel(x=slice(5, None)).assign_coords(c=1)
     ]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Ejemplo n.º 13
0
    def cen_diff(cls, arr, dim, spacing=1, is_coord=False, do_edges_one_sided=False):
        """Centered differencing of the DataArray or Dataset.

        :param arr: Data to be center-differenced.
        :type arr: `xray.DataArray` or `xray.Dataset`
        :param str dim: Dimension over which to perform the differencing.
        :param int spacing: How many gridpoints over to use.  Size of resulting
                            array depends on this value.
        :param do_edges_one_sided: Whether or not to fill in the edge cells
                                   that don't have the needed neighbor cells
                                   for the stencil.  If `True`, use one-sided
                                   differencing with the same order of accuracy
                                   as `order`, and the outputted array is the
                                   same shape as `arr`.

                                   If `False`, the outputted array has a length
                                   in the computed axis reduced by `order`.
        """
        if spacing < 1:
            raise ValueError("Centered differencing cannot have spacing < 1")
        left = arr.isel(**{dim: slice(0, -spacing)})
        right = arr.isel(**{dim: slice(spacing, None)})
        # Centered differencing = sum of intermediate forward differences
        diff = cls.fwd_diff1(right, dim, is_coord=is_coord) + cls.bwd_diff1(left, dim, is_coord=is_coord)
        if do_edges_one_sided:
            left = arr.isel(**{dim: slice(0, 2)})
            right = arr.isel(**{dim: slice(-2, None)})
            diff_left = cls.fwd_diff1(left, dim, is_coord=is_coord)
            diff_right = cls.bwd_diff1(right, dim, is_coord=is_coord)
            diff = xray.concat([diff_left, diff, diff_right], dim=dim)
        return diff
Ejemplo n.º 14
0
def wrapyear_all(data, daymin, daymax):
    """Wrap daily data to extended ranges over each year in yearly data."""

    def extract_year(data, year, years):
        if year in years:
            data_out = atm.subset(data, {'year' : (year, year)})
        else:
            data_out = None
        return data_out

    daynm = atm.get_coord(data, 'day', 'name')
    days = np.arange(daymin, daymax + 1)
    days = xray.DataArray(days, name=daynm, coords={daynm : days})
    years = atm.get_coord(data, 'year')
    yearnm = atm.get_coord(data, 'year', 'name')
    for y, year in enumerate(years):
        year_prev, year_next = year - 1, year + 1
        var = extract_year(data, year, years)
        var_prev = extract_year(data, year_prev, years)
        var_next = extract_year(data, year_next, years)
        var_out = wrapyear(var, var_prev, var_next, daymin, daymax, year)
        var_out = atm.expand_dims(var_out, 'year', year, axis=0)
        var_out = var_out.reindex_like(days)
        if y == 0:
            data_out = var_out
        else:
            data_out = xray.concat([data_out, var_out], dim=yearnm)

    return data_out
Ejemplo n.º 15
0
def fetch_full_san_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range and stream key.  Genereate all data in the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_san_location_metadata(stream_key, time_range)
    # get which bins we can gather data from
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded DataSAN")
        return None
    data = []
    next_index = 0
    for time_bin in location_metadata.bin_list:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)
            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range,
                                                   index_start=next_index)
                    if new_data is not None:
                        data.append(new_data)
                        # Keep track of indexes so they are unique in the final dataset
                        next_index += len(new_data['index'])
    if len(data) == 0:
        return None
    return xray.concat(data, dim='index')
Ejemplo n.º 16
0
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ):
	'''
	take list of consecutive netcdf files (made for CMIP5 data) and stack them into a 
	single larger netcdf file.  This was necessary to overcome some bugginess in how 
	MFDataset is dealing with different calendar units on different files.  This is 
	technically valid CF-Compliant metadata, but is tricky to work with.  This hack allows
	us to get around some of this unpredictable behavior.

	PARAMETERS:
	-----------
	filelist = [list] list of string file paths to the sorted netcdf files to stack together
	output_filename = [str] path to and name of the output file to be generated (.nc extension)
	dim = [str] dimension to stack on -- default is 'time'
	begin_time = [str] PANDAS style datetime string syntax -- used in xray
	end_time = [str] PANDAS style datetime string syntax -- used in xray
	format = [str] output NetCDF format desired. valid strings are:
					'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'
					default is 'NETCDF4'
	**kwargs -- potential future arguments or overloaded args to pass through (none implemented)

	RETURNS:
	--------

	output_filename as string, with the important side-effect of writing data to disk

	'''
	import xray
	with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds:
		# time slicer condition
		if begin_time != None and end_time != None:
			ds = ds.loc[ { dim:slice( begin_time, end_time ) } ]
		if os.path.exists( output_filename ):
			os.remove( output_filename )
		ds.to_netcdf( output_filename, mode='w', format=nc_format )
	return output_filename
Ejemplo n.º 17
0
    def test_concat_do_not_promote(self):
        # GH438
        objs = [
            Dataset({'y': ('t', [1])}, {'x': 1}),
            Dataset({'y': ('t', [2])}, {'x': 1})
        ]
        expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]})
        actual = concat(objs, 't')
        self.assertDatasetIdentical(expected, actual)

        objs = [
            Dataset({'y': ('t', [1])}, {'x': 1}),
            Dataset({'y': ('t', [2])}, {'x': 2})
        ]
        with self.assertRaises(ValueError):
            concat(objs, 't', coords='minimal')
Ejemplo n.º 18
0
def compile_datasets(datasets):
    """
    Given a list of datasets. Possibly containing None. Return a single
    dataset with unique indexes and sorted by the 'time' parameter
    :param datasets: :return:
    """
    # filter out the Nones
    datasets = filter(None, datasets)
    if len(datasets) == 0:
        return None
    datasets.sort(key=lambda val: val['time'].values[0])
    # now determine if they are in order or not..
    start = 0
    end = 0
    idx = 0
    for ds in datasets:
        ns = ds['time'].min()
        ne = ds['time'].max()
        # Determine if the max and the min are all in order
        start = ns
        end = ne
        new_index = [i for i in range(idx, idx + len(ds['index']))]
        ds['index'] = new_index
        idx = new_index[-1] + 1
    dataset = xray.concat(datasets, dim='index')
    sorted_idx = dataset.time.argsort()
    dataset = dataset.reindex({'index': sorted_idx})
    return dataset
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ):
	'''
	take list of consecutive netcdf files (made for CMIP5 data) and stack them into a 
	single larger netcdf file.  This was necessary to overcome some bugginess in how 
	MFDataset is dealing with different calendar units on different files.  This is 
	technically valid CF-Compliant metadata, but is tricky to work with.  This hack allows
	us to get around some of this unpredictable behavior.

	PARAMETERS:
	-----------
	filelist = [list] list of string file paths to the sorted netcdf files to stack together
	output_filename = [str] path to and name of the output file to be generated (.nc extension)
	dim = [str] dimension to stack on -- default is 'time'
	begin_time = [str] PANDAS style datetime string syntax -- used in xray
	end_time = [str] PANDAS style datetime string syntax -- used in xray
	format = [str] output NetCDF format desired. valid strings are:
					'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'
					default is 'NETCDF4'
	**kwargs -- potential future arguments or overloaded args to pass through (none implemented)

	RETURNS:
	--------

	output_filename as string, with the important side-effect of writing data to disk

	'''
	import xray
	with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds:
		# time slicer condition
		if begin_time != None and end_time != None:
			ds = ds.loc[ { dim:slice( begin_time, end_time ) } ]
		if os.path.exists( output_filename ):
			os.remove( output_filename )
		ds.to_netcdf( output_filename, mode='w', format=nc_format )
	return output_filename
def read_data(data_dir, lat, lon, resample=None):
    files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)])
    dss = [xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files]
    ds = xr.concat([dr.load() for dr in dss], 'time')
    if resample is not None:
        ds = ds.resample(resample, 'time')
    return ds
Ejemplo n.º 21
0
    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo']
        bar = self.ds['bar']
        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo, stacked)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], compat='identical')
    def combinevars(ds_in,dat_vars,new_dim_name='new_dim',combinevarname='new_var'):
        ds_out = xray.Dataset()
        ds_out = xray.concat([ds_in[dv] for dv in dat_vars],dim='new_dim')
        ds_out = ds_out.rename({'new_dim': new_dim_name})
        ds_out.coords[new_dim_name] = dat_vars
        ds_out.name = combinevarname

        return ds_out
Ejemplo n.º 23
0
    def read_all(self,channels, start_offset,  end_offset, buffer):
        evs = self.events

        raw_bin_wrappers, original_eeg_files = self.__create_bin_readers()

        # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length()
        time_series_length = self.__compute_time_series_length()

        time_series_data = np.empty((len(channels),len(evs),time_series_length),
                             dtype=np.float)*np.nan

        events = []

        newdat_list = []

        # for s,src in enumerate(usources):
        for s,(src,eegfile) in enumerate(zip(raw_bin_wrappers,original_eeg_files)):
            ind = np.atleast_1d( evs.eegfile == eegfile)

            if len(ind) == 1:
                events.append(evs[0])
            else:
                events.append(evs[ind])

            # print event_offsets
            #print "Loading %d events from %s" % (ind.sum(),src)
            # get the timeseries for those events
            newdat = src.get_event_data_xray_simple(channels=channels,events=events,
                                                    start_offset=start_offset,end_offset=end_offset,buffer=buffer)

            newdat_list.append(newdat)


        start_extend_time = time.time()
        #new code
        eventdata = xray.concat(newdat_list,dim='events')
        end_extend_time = time.time()


        # concatenate (must eventually check that dims match)
        # ORIGINAL CODE
        tdim = eventdata['time']
        cdim = eventdata['channels']
        # srate = eventdata.samplerate
        srate = eventdata.attrs['samplerate']

        eventdata_xray = eventdata
        # eventdata_xray = xray.DataArray(np.squeeze(eventdata.values), coords=[cdim,tdim], dims=['channels','time'])
        # eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate']


        if not self.keep_buffer:
            # trimming buffer data samples
            number_of_buffer_samples =  self.get_number_of_samples_for_interval(self.buffer_time)
            if number_of_buffer_samples > 0:
                eventdata_xray = eventdata_xray[:,:,number_of_buffer_samples:-number_of_buffer_samples]

        return eventdata_xray
Ejemplo n.º 24
0
    def filter(self):

        event_data_dict = OrderedDict()

        for eegfile_name, data in self.data_dict.items():

            evs = self.events[self.events.eegfile == eegfile_name]

            samplerate = data.attrs['samplerate']

            # used in constructing time_axis
            offset_time_array = data['time'].values['eegoffset']

            event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift(ev=evs[0],
                                                                                                  samplerate=samplerate,
                                                                                                  offset_time_array=offset_time_array)

            event_time_axis = np.linspace(-self.buffer + self.time_shift,
                                          self.event_duration + self.buffer + self.time_shift,
                                          event_chunk_size)

            data_list = []

            shape = None

            for i, ev in enumerate(evs):
                # print ev.eegoffset
                start_chop_pos = np.where(offset_time_array >= ev.eegoffset)[0][0]
                start_chop_pos += start_point_shift
                selector_array = np.arange(start=start_chop_pos, stop=start_chop_pos + event_chunk_size)

                # ev_array = eeg_session_data[:,:,selector_array] # ORIG CODE

                chopped_data_array = data.isel(time=selector_array)

                chopped_data_array['time'] = event_time_axis
                chopped_data_array['events'] = [i]

                data_list.append(chopped_data_array)

                # print i

            ev_concat_data = xray.concat(data_list, dim='events')

            # replacing simple events axis (consecutive integers) with recarray of events
            ev_concat_data['events'] = evs

            ev_concat_data.attrs['samplerate'] = samplerate
            ev_concat_data.attrs['time_shift'] = self.time_shift
            ev_concat_data.attrs['event_duration'] = self.event_duration
            ev_concat_data.attrs['buffer'] = self.buffer

            event_data_dict[eegfile_name] = TimeSeriesX(ev_concat_data)

            break  # REMOVE THIS

        return event_data_dict
def read_data(data_dir, lat, lon, resample=None):
    files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)])
    dss = [
        xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest')
        for f in files
    ]
    ds = xr.concat([dr.load() for dr in dss], 'time')
    if resample is not None:
        ds = ds.resample(resample, 'time')
    return ds
Ejemplo n.º 26
0
    def test_concat_lazy(self):
        import dask.array as da

        arrays = [DataArray(
            da.from_array(InaccessibleArray(np.zeros((3, 3))), 3),
            dims=['x', 'y']) for _ in range(2)]
        # should not raise
        combined = concat(arrays, dim='z')
        self.assertEqual(combined.shape, (2, 3, 3))
        self.assertEqual(combined.dims, ('z', 'x', 'y'))
Ejemplo n.º 27
0
    def test_concat_lazy(self):
        import dask.array as da

        arrays = [
            DataArray(da.from_array(InaccessibleArray(np.zeros((3, 3))), 3),
                      dims=['x', 'y']) for _ in range(2)
        ]
        # should not raise
        combined = concat(arrays, dim='z')
        self.assertEqual(combined.shape, (2, 3, 3))
        self.assertEqual(combined.dims, ('z', 'x', 'y'))
def ssn_average(var, onset, retreat, season):
    years = var['year'].values
    for y, year in enumerate(years):
        days = season_days(season, year, onset.values[y], retreat.values[y])
        var_yr = atm.subset(var, {'year' : (year, year)}, squeeze=False)
        var_yr = var_yr.sel(dayrel=days).mean(dim='dayrel')
        if y == 0:
            var_out = var_yr
        else:
            var_out = xray.concat([var_out, var_yr], dim='year')
    return var_out
Ejemplo n.º 29
0
    def test_lazy_array(self):
        u = self.eager_array
        v = self.lazy_array

        self.assertLazyAndAllClose(u, v)
        self.assertLazyAndAllClose(-u, -v)
        self.assertLazyAndAllClose(u.T, v.T)
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(1 + u, 1 + v)

        actual = concat([v[:2], v[2:]], "x")
        self.assertLazyAndAllClose(u, actual)
Ejemplo n.º 30
0
    def test_lazy_array(self):
        u = self.eager_array
        v = self.lazy_array

        self.assertLazyAndAllClose(u, v)
        self.assertLazyAndAllClose(-u, -v)
        self.assertLazyAndAllClose(u.T, v.T)
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(1 + u, 1 + v)

        actual = concat([v[:2], v[2:]], 'x')
        self.assertLazyAndAllClose(u, actual)
Ejemplo n.º 31
0
def concat_plevs(datadir, year, varnm, plevs, pdim, version):
    pname = 'Height'
    for i, plev in enumerate(plevs):
        filenm = datafile(datadir, varnm, plev, year, version)
        print('Reading ' + filenm)
        with xray.open_dataset(filenm) as ds:
            var_in = ds[varnm].load()
            var_in = atm.expand_dims(var_in, pname, plev, axis=1)
        if i == 0:
            var = var_in
        else:
            var = xray.concat([var, var_in], dim=pname)
    return var
Ejemplo n.º 32
0
def wraparound_lon(arr, n=1, radians=True):
    """Append wrap-around points in longitude to the DataArray or Dataset.

    The longitude arraymust span from 0 to 360.  While this will usually be the
    case, it's not guaranteed.  Some pre-processing step should be implemented
    in the future that forces this to be the case.
    """
    circumf = 2*np.pi if radians else 360.
    edge_left = arr.isel(**{LON_STR: 0})
    edge_left[LON_STR] += circumf
    edge_right = arr.isel(**{LON_STR: -1})
    edge_right[LON_STR] -= circumf
    return xray.concat([edge_right, arr, edge_left], dim=LON_STR)
Ejemplo n.º 33
0
    def fromshot(shot, camera, los=None):
        """
        Return the calibrated signal of the XtomoCamera LoS chosen.

        Parameters
        ----------
        shot : int or MDSConnection
            Shot number or connection instance
        camera : int
            Number of the XTOMO camera
        los : int or sequence of ints
            Optional argument with lines of sight (LoS) of the chosen camera.
            If None, it loads all the 20 channels

        Returns
        -------
        Calibrated signals XTOMO signals.

        Examples
        --------
        >>> import tcv
        >>> cam = tcv.diag.XtomoCamera.fromshot(50766, camera=1, los=[4, 5])
        """

        if los is None:
            los = np.arange(20) + 1
        else:
            los = np.atleast_1d(los)

        values = []
        with tcv.shot(shot) as conn:
            for channel in XtomoCamera.channels(shot, camera, los=los):
                values.append(conn.tdi(channel, dims='time'))

        data = xray.concat(values, dim='los')
        data['los'] = los

        # Remove the offset before the shot
        data -= data.where(data.time < 0).mean(dim='time')

        # and now we normalize conveniently
        # FIXME: use xray's infrastructure to compute this
        gain, amp = XtomoCamera.gains(shot, camera, los=los)
        data *= np.transpose(np.tile(gain, (data.values.shape[1], 1)) /
                             np.tile(amp, (data.values.shape[1], 1)))

        data.attrs.update({'camera': camera})

        return data
Ejemplo n.º 34
0
def grabDateRange(input_dir,data,start='2010-01-01',end=dt.datetime.now()):
    rng = pd.date_range(start,end,freq='D')
    filerng = ['raw_MpalaTower_%04d_%03d.nc'%
               (date.year,date.dayofyear) for date in rng]
    ds_list = []
    fileNames = []
    FILEDIR = input_dir+data+'/'
    for fileName in set(filerng) & set(os.listdir(FILEDIR)):
        fileNames.append(fileName)
    fileNames.sort()
    for fileName in fileNames:
        ds_list.append(xray.open_dataset(FILEDIR+fileName,decode_times=True)) 
    ds = xray.Dataset()
    ds = xray.concat((ds_list[0:]),dim='time')
    return ds
Ejemplo n.º 35
0
def daily_rel2onset(data, d_onset, npre, npost):
    """Return subset of daily data aligned relative to onset day.

    Parameters
    ----------
    data : xray.DataArray
        Daily data.
    d_onset : ndarray
        Array of onset date (day of year) for each year.
    npre, npost : int
        Number of days before and after onset to extract.

    Returns
    -------
    data_out : xray.DataArray
        Subset of N days of daily data for each year, where
        N = npre + npost + 1 and the day dimension is
        dayrel = day - d_onset.
    """

    name, attrs, coords, dimnames = atm.meta(data)
    yearnm = atm.get_coord(data, 'year', 'name')
    daynm = atm.get_coord(data, 'day', 'name')
    years = atm.makelist(atm.get_coord(data, 'year'))

    if isinstance(d_onset, xray.DataArray):
        d_onset = d_onset.values
    else:
        d_onset = atm.makelist(d_onset)

    relnm = daynm + 'rel'

    for y, year in enumerate(years):
        dmin, dmax = d_onset[y] - npre, d_onset[y] + npost
        subset_dict = {yearnm : (year, None), daynm : (dmin, dmax)}
        sub = atm.subset(data, subset_dict)
        sub = sub.rename({daynm : relnm})
        sub[relnm] = sub[relnm] - d_onset[y]
        sub[relnm].attrs['long_name'] = 'Day of year relative to onset day'
        if y == 0:
            data_out = sub
        else:
            data_out = xray.concat([data_out, sub], dim=yearnm)

    data_out.attrs['d_onset'] = d_onset

    return data_out
Ejemplo n.º 36
0
    def pad_zl_to_zp1(self, array, fill_value=0., zlname='Zl', zp1name='Zp1'):
        """Pad an array located at zl points such that it is located at
        zp1 points. An additional fill value is required for the bottom point.

        Parameters
        ----------
        array : xray DataArray
            The array to difference. Must have the coordinate zp1.
        fill_value : number, optional
            The value to be used at the bottom point.
        zlname : str, optional
            The variable name for the zl point
        zp1name : str, optional
            The variable name for the zp1 point

        Returns
        -------
        padded : xray DataArray
            Padded array with vertical coordinate zp1.
        """
        coords, dims = self._get_coords_from_dims(array.dims)
        zdim = dims.index(zlname)
        # shape of the new array to concat at the bottom
        shape = list(array.shape)
        shape[zdim] = 1
        # replace Zl with the bottom level
        coords[zlname] = np.atleast_1d(self.ds[zp1name][-1].data)
        # an array of zeros at the bottom
        # need different behavior for numpy vs dask
        if array.chunks:
            chunks = list(array.data.chunks)
            chunks[zdim] = (1, )
            zarr = fill_value * da.ones(
                shape, dtype=array.dtype, chunks=chunks)
            zeros = xray.DataArray(zarr, coords, dims).chunk()
        else:
            zarr = np.zeros(shape, array.dtype)
            zeros = xray.DataArray(zarr, coords, dims)
        newarray = xray.concat([array, zeros],
                               dim=zlname).rename({zlname: zp1name})
        if newarray.chunks:
            # this assumes that there was only one chunk in the vertical to begin with
            # how can we do that better
            return newarray.chunk({zp1name: len(newarray[zp1name])})
        else:
            return newarray
Ejemplo n.º 37
0
    def pad_zl_to_zp1(self, array, fill_value=0., zlname='Zl', zp1name='Zp1'):
        """Pad an array located at zl points such that it is located at
        zp1 points. An additional fill value is required for the bottom point.

        Parameters
        ----------
        array : xray DataArray
            The array to difference. Must have the coordinate zp1.
        fill_value : number, optional
            The value to be used at the bottom point.
        zlname : str, optional
            The variable name for the zl point
        zp1name : str, optional
            The variable name for the zp1 point

        Returns
        -------
        padded : xray DataArray
            Padded array with vertical coordinate zp1.
        """
        coords, dims = self._get_coords_from_dims(array.dims)
        zdim = dims.index(zlname)
        # shape of the new array to concat at the bottom
        shape = list(array.shape)
        shape[zdim] = 1
        # replace Zl with the bottom level
        coords[zlname] = np.atleast_1d(self.ds[zp1name][-1].data)
        # an array of zeros at the bottom
        # need different behavior for numpy vs dask
        if array.chunks:
            chunks = list(array.data.chunks)
            chunks[zdim] = (1,)
            zarr = fill_value * da.ones(shape, dtype=array.dtype, chunks=chunks)
            zeros = xray.DataArray(zarr, coords, dims).chunk()
        else:
            zarr = np.zeros(shape, array.dtype)
            zeros = xray.DataArray(zarr, coords, dims)
        newarray = xray.concat([array, zeros], dim=zlname).rename({zlname: zp1name})
        if newarray.chunks:
            # this assumes that there was only one chunk in the vertical to begin with
            # how can we do that better
            return newarray.chunk({zp1name: len(newarray[zp1name])})
        else:
            return newarray
Ejemplo n.º 38
0
    def test_concat_promote_shape(self):
        # mixed dims within variables
        objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]})
        self.assertDatasetIdentical(actual, expected)

        objs = [Dataset({'x': [0]}), Dataset({}, {'x': 1})]
        actual = concat(objs, 'x')
        self.assertDatasetIdentical(actual, expected)

        # mixed dims between variables
        objs = [Dataset({'x': [2], 'y': 3}), Dataset({'x': [4], 'y': 5})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [2, 4], 'y': ('x', [3, 5])})
        self.assertDatasetIdentical(actual, expected)

        # mixed dims in coord variable
        objs = [
            Dataset({'x': [0]}, {'y': -1}),
            Dataset({'x': [1]}, {'y': ('x', [-2])})
        ]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]}, {'y': ('x', [-1, -2])})
        self.assertDatasetIdentical(actual, expected)

        # scalars with mixed lengths along concat dim -- values should repeat
        objs = [
            Dataset({'x': [0]}, {'y': -1}),
            Dataset({'x': [1, 2]}, {'y': -2})
        ]
        actual = concat(objs, 'x')
        expected = Dataset({}, {'y': ('x', [-1, -2, -2])})
        self.assertDatasetIdentical(actual, expected)

        # broadcast 1d x 1d -> 2d
        objs = [
            Dataset({'z': ('x', [-1])}, {
                'x': [0],
                'y': [0]
            }),
            Dataset({'z': ('y', [1])}, {
                'x': [1],
                'y': [0]
            })
        ]
        actual = concat(objs, 'x')
        expected = Dataset({'z': (('x', 'y'), [[-1], [1]])})
        self.assertDatasetIdentical(actual, expected)
Ejemplo n.º 39
0
def binned_probability_plot(variable, bin_divs, ax=None, **kwdargs):
    """
    Creates a plot showing the binned probability of the data
    in variable.
    """
    ax, _ = utils.axis_figure(axis=ax)
    if variable.dims == ('time', ):
        variable = (xray.concat([variable], 'realization')
                    .transpose('time', 'realization'))
    assert variable.dims == ('time', 'realization')
    n_times, n_real = variable.shape
    # compute the binned probabilities
    probs = bin_probs(variable, bin_divs)
    # default to a blue colormap placed in the background
    kwdargs['cmap'] = kwdargs.get('cmap', plt.cm.get_cmap('Blues'))
    kwdargs['zorder'] = kwdargs.get('zorder', -100)
    # plot the probabilities
    y, x = np.meshgrid(np.arange(bin_divs.size),
                       np.arange(variable['time'].size + 1))
    pm = ax.pcolormesh(x, y, probs.values,
                       norm=plt.Normalize(vmin=0., vmax=1.),
                       **kwdargs)
    return pm
Ejemplo n.º 40
0
 def create_sync_delimited(self):
     # Currently this code assumes that the request only contained one stream. Otherwise code will break
     # in the future we may squash all of the differing streams into one result.
     if len(self.stream_param_map) > 1:
         raise StreamEngineException("Should not have more than one stream in request for delimited data", 500)
     datasets = []
     output_vars = []
     for sk, deployment, ds in self.stream_data.groups(self.stream_param_map.keys()[0]):
         ds, output_vars = self._fix_for_sync(ds, self.stream_param_map[sk])
         datasets.append(ds)
     final_data = xray.concat(datasets, dim='obs')
     final_data['obs'].values = numpy.arange(0, final_data['obs'].size, dtype=numpy.int32)
     key_vars = ['subsite', 'node', 'sensor', 'stream']
     # output columns in the correct order
     output_vars = output_vars[:4] + key_vars + output_vars[4:]
     final_data.attrs['subsite'] = datasets[0].attrs['subsite']
     final_data.attrs['node'] = datasets[0].attrs['node']
     final_data.attrs['sensor'] = datasets[0].attrs['sensor']
     final_data.attrs['stream'] = datasets[0].attrs['stream']
     with tempfile.NamedTemporaryFile() as tf:
         self._write_csv_out(tf.file, final_data, output_vars, set(key_vars))
         tf.seek(0)
         return tf.read()
Ejemplo n.º 41
0
    def cen_diff(cls,
                 arr,
                 dim,
                 spacing=1,
                 is_coord=False,
                 do_edges_one_sided=False):
        """Centered differencing of the DataArray or Dataset.

        :param arr: Data to be center-differenced.
        :type arr: `xray.DataArray` or `xray.Dataset`
        :param str dim: Dimension over which to perform the differencing.
        :param int spacing: How many gridpoints over to use.  Size of resulting
                            array depends on this value.
        :param do_edges_one_sided: Whether or not to fill in the edge cells
                                   that don't have the needed neighbor cells
                                   for the stencil.  If `True`, use one-sided
                                   differencing with the same order of accuracy
                                   as `order`, and the outputted array is the
                                   same shape as `arr`.

                                   If `False`, the outputted array has a length
                                   in the computed axis reduced by `order`.
        """
        if spacing < 1:
            raise ValueError("Centered differencing cannot have spacing < 1")
        left = arr.isel(**{dim: slice(0, -spacing)})
        right = arr.isel(**{dim: slice(spacing, None)})
        # Centered differencing = sum of intermediate forward differences
        diff = (cls.fwd_diff1(right, dim, is_coord=is_coord) +
                cls.bwd_diff1(left, dim, is_coord=is_coord))
        if do_edges_one_sided:
            left = arr.isel(**{dim: slice(0, 2)})
            right = arr.isel(**{dim: slice(-2, None)})
            diff_left = cls.fwd_diff1(left, dim, is_coord=is_coord)
            diff_right = cls.bwd_diff1(right, dim, is_coord=is_coord)
            diff = xray.concat([diff_left, diff, diff_right], dim=dim)
        return diff
Ejemplo n.º 42
0
    def fromshot(shotnum, los=None):
        """ Read the ECE LFS data from the specified shot """

        with tcv.shot(shotnum) as conn:
            try:
                frequency = conn.tdi(r'\results::ece_lfs:rf_freqs')
            except:  # FIXME: catch more specific exception
                frequency = Lfs.DEFAULT_FREQUENCIES
        type(frequency)
        if los:
            # remember that we use the los as index for channels
            los = np.atleast_1d(los) - 1
        else:
            los = np.arange(frequency.size)

        values = []
        used_los = []
        with tcv.shot(shotnum) as conn:
            for i, channel in enumerate(Lfs.channels(conn.shot)):
                if i in los:
                    values.append(conn.tdi(channel, dims='time'))
                    used_los.append(i + 1)

        data = xray.concat(values, dim='los')
        data.coords['los'] = used_los
        # TODO: add frequency coordinate

        # Normalize to mean value
        mean = data.where(data.time < 0).mean(dim='time')
        data = (data - mean) / mean

        # Fill-in data attributes
        with tcv.shot(shotnum) as conn:
            data.attrs['z_antenna'] = Lfs.zpos(conn)

        return data
Ejemplo n.º 43
0
    def fromshot(shotnum, los=None):
        """ Read the ECE LFS data from the specified shot """

        with tcv.shot(shotnum) as conn:
            try:
                frequency = conn.tdi(r'\results::ece_lfs:rf_freqs')
            except:  # FIXME: catch more specific exception
                frequency = Lfs.DEFAULT_FREQUENCIES
        type(frequency)
        if los:
            # remember that we use the los as index for channels
            los = np.atleast_1d(los)-1
        else:
            los = np.arange(frequency.size)

        values = []
        used_los = []
        with tcv.shot(shotnum) as conn:
            for i, channel in enumerate(Lfs.channels(conn.shot)):
                if i in los:
                    values.append(conn.tdi(channel, dims='time'))
                    used_los.append(i+1)

        data = xray.concat(values, dim='los')
        data.coords['los'] = used_los
        # TODO: add frequency coordinate

        # Normalize to mean value
        mean = data.where(data.time < 0).mean(dim='time')
        data = (data - mean) / mean

        # Fill-in data attributes
        with tcv.shot(shotnum) as conn:
            data.attrs['z_antenna'] = Lfs.zpos(conn)

        return data
Ejemplo n.º 44
0
    def test_concat_promote_shape(self):
        # mixed dims within variables
        objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]})
        self.assertDatasetIdentical(actual, expected)

        objs = [Dataset({'x': [0]}), Dataset({}, {'x': 1})]
        actual = concat(objs, 'x')
        self.assertDatasetIdentical(actual, expected)

        # mixed dims between variables
        objs = [Dataset({'x': [2], 'y': 3}), Dataset({'x': [4], 'y': 5})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [2, 4], 'y': ('x', [3, 5])})
        self.assertDatasetIdentical(actual, expected)

        # mixed dims in coord variable
        objs = [Dataset({'x': [0]}, {'y': -1}),
                Dataset({'x': [1]}, {'y': ('x', [-2])})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]}, {'y': ('x', [-1, -2])})
        self.assertDatasetIdentical(actual, expected)

        # scalars with mixed lengths along concat dim -- values should repeat
        objs = [Dataset({'x': [0]}, {'y': -1}),
                Dataset({'x': [1, 2]}, {'y': -2})]
        actual = concat(objs, 'x')
        expected = Dataset({}, {'y': ('x', [-1, -2, -2])})
        self.assertDatasetIdentical(actual, expected)

        # broadcast 1d x 1d -> 2d
        objs = [Dataset({'z': ('x', [-1])}, {'x': [0], 'y': [0]}),
                Dataset({'z': ('y', [1])}, {'x': [1], 'y': [0]})]
        actual = concat(objs, 'x')
        expected = Dataset({'z': (('x', 'y'), [[-1], [1]])})
        self.assertDatasetIdentical(actual, expected)
Ejemplo n.º 45
0
 def test_concat_data_vars(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     objs = [data.isel(x=slice(5)), data.isel(x=slice(5, None))]
     for data_vars in ['minimal', 'different', 'all', [], ['foo']]:
         actual = concat(objs, dim='x', data_vars=data_vars)
         self.assertDatasetIdentical(data, actual)
Ejemplo n.º 46
0
def calculate(dbf,
              comps,
              phases,
              mode=None,
              output='GM',
              fake_points=False,
              **kwargs):
    """
    Sample the property surface of 'output' containing the specified
    components and phases. Model parameters are taken from 'dbf' and any
    state variables (T, P, etc.) can be specified as keyword arguments.

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters.
    comps : str or sequence
        Names of components to consider in the calculation.
    phases : str or sequence
        Names of phases to consider in the calculation.
    mode : string, optional
        See 'make_callable' docstring for details.
    output : string, optional
        Model attribute to sample.
    fake_points : bool, optional (Default: False)
        If True, the first few points of the output surface will be fictitious
        points used to define an equilibrium hyperplane guaranteed to be above
        all the other points. This is used for convex hull computations.
    points : ndarray or a dict of phase names to ndarray, optional
        Columns of ndarrays must be internal degrees of freedom (site fractions), sorted.
        If this is not specified, points will be generated automatically.
    pdens : int, a dict of phase names to int, or a seq of both, optional
        Number of points to sample per degree of freedom.
    model : Model, a dict of phase names to Model, or a seq of both, optional
        Model class to use for each phase.

    Returns
    -------
    xray.Dataset of the sampled attribute as a function of state variables

    Examples
    --------
    None yet.
    """
    # Here we check for any keyword arguments that are special, i.e.,
    # there may be keyword arguments that aren't state variables
    pdens_dict = unpack_kwarg(kwargs.pop('pdens', 2000), default_arg=2000)
    points_dict = unpack_kwarg(kwargs.pop('points', None), default_arg=None)
    model_dict = unpack_kwarg(kwargs.pop('model', Model), default_arg=Model)
    callable_dict = unpack_kwarg(kwargs.pop('callables', None),
                                 default_arg=None)
    if isinstance(phases, str):
        phases = [phases]
    if isinstance(comps, str):
        comps = [comps]
    components = [x for x in sorted(comps) if not x.startswith('VA')]

    # Convert keyword strings to proper state variable objects
    # If we don't do this, sympy will get confused during substitution
    statevar_dict = collections.OrderedDict((v.StateVariable(key), unpack_condition(value)) \
                                            for (key, value) in sorted(kwargs.items()))
    str_statevar_dict = collections.OrderedDict((str(key), unpack_condition(value)) \
                                                for (key, value) in statevar_dict.items())
    all_phase_data = []
    comp_sets = {}
    largest_energy = -np.inf
    maximum_internal_dof = 0

    # Consider only the active phases
    active_phases = dict((name.upper(), dbf.phases[name.upper()]) \
        for name in unpack_phases(phases))

    for phase_name, phase_obj in sorted(active_phases.items()):
        # Build the symbolic representation of the energy
        mod = model_dict[phase_name]
        # if this is an object type, we need to construct it
        if isinstance(mod, type):
            try:
                model_dict[phase_name] = mod = mod(dbf, comps, phase_name)
            except DofError:
                # we can't build the specified phase because the
                # specified components aren't found in every sublattice
                # we'll just skip it
                logger.warning(
                    """Suspending specified phase %s due to
                some sublattices containing only unspecified components""",
                    phase_name)
                continue
        if points_dict[phase_name] is None:
            try:
                out = getattr(mod, output)
                maximum_internal_dof = max(maximum_internal_dof,
                                           len(out.atoms(v.SiteFraction)))
            except AttributeError:
                raise AttributeError(
                    'Missing Model attribute {0} specified for {1}'.format(
                        output, mod.__class__))
        else:
            maximum_internal_dof = max(
                maximum_internal_dof,
                np.asarray(points_dict[phase_name]).shape[-1])

    for phase_name, phase_obj in sorted(active_phases.items()):
        try:
            mod = model_dict[phase_name]
        except KeyError:
            continue
        # Construct an ordered list of the variables
        variables, sublattice_dof = generate_dof(phase_obj, mod.components)

        # Build the "fast" representation of that model
        if callable_dict[phase_name] is None:
            out = getattr(mod, output)
            # As a last resort, treat undefined symbols as zero
            # But warn the user when we do this
            # This is consistent with TC's behavior
            undefs = list(out.atoms(Symbol) - out.atoms(v.StateVariable))
            for undef in undefs:
                out = out.xreplace({undef: float(0)})
                logger.warning(
                    'Setting undefined symbol %s for phase %s to zero', undef,
                    phase_name)
            comp_sets[phase_name] = make_callable(out, \
                list(statevar_dict.keys()) + variables, mode=mode)
        else:
            comp_sets[phase_name] = callable_dict[phase_name]

        points = points_dict[phase_name]
        if points is None:
            # Eliminate pure vacancy endmembers from the calculation
            vacancy_indices = list()
            for idx, sublattice in enumerate(phase_obj.constituents):
                active_in_subl = sorted(
                    set(phase_obj.constituents[idx]).intersection(comps))
                if 'VA' in active_in_subl and 'VA' in sorted(comps):
                    vacancy_indices.append(active_in_subl.index('VA'))
            if len(vacancy_indices) != len(phase_obj.constituents):
                vacancy_indices = None
            logger.debug('vacancy_indices: %s', vacancy_indices)
            # Add all endmembers to guarantee their presence
            points = endmember_matrix(sublattice_dof,
                                      vacancy_indices=vacancy_indices)

            # Sample composition space for more points
            if sum(sublattice_dof) > len(sublattice_dof):
                points = np.concatenate(
                    (points,
                     point_sample(sublattice_dof,
                                  pdof=pdens_dict[phase_name])))

            # If there are nontrivial sublattices with vacancies in them,
            # generate a set of points where their fraction is zero and renormalize
            for idx, sublattice in enumerate(phase_obj.constituents):
                if 'VA' in set(sublattice) and len(sublattice) > 1:
                    var_idx = variables.index(
                        v.SiteFraction(phase_name, idx, 'VA'))
                    addtl_pts = np.copy(points)
                    # set vacancy fraction to log-spaced between 1e-10 and 1e-6
                    addtl_pts[:, var_idx] = np.power(
                        10.0, -10.0 * (1.0 - addtl_pts[:, var_idx]))
                    # renormalize site fractions
                    cur_idx = 0
                    for ctx in sublattice_dof:
                        end_idx = cur_idx + ctx
                        addtl_pts[:, cur_idx:end_idx] /= \
                            addtl_pts[:, cur_idx:end_idx].sum(axis=1)[:, None]
                        cur_idx = end_idx
                    # add to points matrix
                    points = np.concatenate((points, addtl_pts), axis=0)
            # Filter out nan's that may have slipped in if we sampled too high a vacancy concentration
            # Issues with this appear to be platform-dependent
            points = points[~np.isnan(points).any(axis=-1)]
        # Ensure that points has the correct dimensions and dtype
        points = np.atleast_2d(np.asarray(points, dtype=np.float))

        phase_ds = _compute_phase_values(phase_obj, components, variables,
                                         str_statevar_dict, points,
                                         comp_sets[phase_name], output,
                                         maximum_internal_dof)
        # largest_energy is really only relevant if fake_points is set
        if fake_points:
            largest_energy = max(phase_ds[output].max(), largest_energy)
        all_phase_data.append(phase_ds)

    if fake_points:
        if output != 'GM':
            raise ValueError(
                'fake_points=True should only be used with output=\'GM\'')
        phase_ds = _generate_fake_points(components, statevar_dict,
                                         largest_energy, output,
                                         maximum_internal_dof)
        final_ds = xray.concat(itertools.chain([phase_ds], all_phase_data),
                               dim='points')
    else:
        # speedup for single-phase case (found by profiling)
        if len(all_phase_data) > 1:
            final_ds = xray.concat(all_phase_data, dim='points')
        else:
            final_ds = all_phase_data[0]

    if (not fake_points) and (len(all_phase_data) == 1):
        pass
    else:
        # Reset the points dimension to use a single global index
        final_ds['points'] = np.arange(len(final_ds.points))
    return final_ds
ens5 = ens5.mean(dim='lon')



timerange = range(31,39)
#timerange = range(35,47)
#timerange = range(0,59)

O3_c=getattr(ensc,var).sel(lat=p)
O3_1=getattr(ens1,var).sel(lat=p)
O3_2=getattr(ens2,var).sel(lat=p)
O3_3=getattr(ens3,var).sel(lat=p)
O3_4=getattr(ens4,var).sel(lat=p)
O3_5=getattr(ens5,var).sel(lat=p)

O3_c=xray.concat([O3_c,O3_c,O3_c,O3_c,O3_c],dim='time')

O3_c=O3_c.isel(time=timerange)
O3_1=O3_1.isel(time=timerange)
O3_2=O3_2.isel(time=timerange)
O3_3=O3_3.isel(time=timerange)
O3_4=O3_4.isel(time=timerange)
O3_5=O3_5.isel(time=timerange)

if 'BR' in var:
    O3_c=O3_c*1e9
    O3_1=O3_1*1e9
    O3_2=O3_2*1e9
    O3_3=O3_3*1e9
    O3_4=O3_4*1e9
    O3_5=O3_5*1e9
Ejemplo n.º 48
0
                                dims=['channels', 'events', 'frequency', 'time']
                                )

    bp_sess_1 = session_data.values[1, 0, :] - session_data.values[2, 0, :]
    print bp_sess_1

    pow_sess_new_1 = phase_pow_multi(freqs, bp_sess_1, to_return='power', samplerates=ev_data.attrs['samplerate'])

    print pow_sess_new_1

    pow_xray_1 = xray.DataArray(pow_sess_new_1.reshape(1, 1, pow_sess_new_1.shape[0], pow_sess_new_1.shape[1]),
                                coords=[['003_004'], np.arange(1), freqs, session_data['time']],
                                dims=['channels', 'events', 'frequency', 'time']
                                )

    pow_combined = xray.concat([pow_xray_0, pow_xray_1], dim='channels')

    pow_combined.attrs['samplerate'] = ev_data.attrs['samplerate']

    edcw = EventDataChopper(events=base_events, event_duration=1.6, buffer=1.0,
                            data_dict={base_events[0].eegfile: pow_combined})

    chopped_wavelets = edcw.filter()

    print

    # class EventDataChopper(PropertiedObject):
    #     _descriptors = [
    #
    #         TypeValTuple('time_shift', float, 0.0),
    #         TypeValTuple('event_duration', float, 0.0),
Ejemplo n.º 49
0
#CREM.parameters()

# Temporary container for read-in data
arrays = {}

def label(variable, desc, unit_long, unit_short):
    """Add some descriptive attributes to an xray.DataArray."""
    arrays[variable].attrs.update({'desc': desc, 'unit_long': unit_long,
                                   'unit_short': unit_short})


# Cell:

# GDP
temp = [raw[case].extract('gdp_ref') for case in cases]
arrays['GDP'] = xray.concat(temp, dim=cases).sel(rs=CREM.set('r'))                     .rename({'rs': 'r'})
label('GDP', 'Gross domestic product',
      'billions of U.S. dollars, constant at 2007', '10⁹ USD')

arrays['GDP_aagr'] = ((arrays['GDP'][:,:,1:].values / arrays['GDP'][:,:,:-1])
                      ** (1 / CREM.extract('lp')) - 1) * 100
label('GDP_aagr', 'Gross domestic product, average annual growth rate',
      'percent', '%')

arrays['GDP_delta'] = (arrays['GDP'] / arrays['GDP'].sel(case='bau') - 1) * 100
label('GDP_delta', 'Change in gross domestic product relative to BAU',
      'percent', '%')


# Cell:
Ejemplo n.º 50
0
        event_time_axis = np.linspace(-1.0, 2.6, len(selector_array))

        ev_array = ts[:, :, selector_array]

        ev_array['time'] = event_time_axis
        ev_array['events'] = [i]

        ev_data_list.append(ev_array)
        # ev_data_list.append(ts[:,:,selector_array].values)

        print i
        # print ev_array
        if i == 2:
            break

    eventdata = xray.concat(ev_data_list, dim='events')

    # eventdata =np.concatenate(ev_data_list,axis=1)

    # eventdata = xray.concat(ev_data_list,dim='events')

    print eventdata

    # eegoffset_time_array = ts['time'].values['eegoffset']
    #
    # ev_data_list = []
    # for i, ev  in enumerate(base_events_0):
    #     print ev.eegoffset
    #     start_offset = ev.eegoffset-int(np.ceil(buffer*samplerate))
    #     end_offset = ev.eegoffset+int(np.ceil((ev_duration+buffer)*samplerate))
    #     print "start_offset,end_offset, size=",start_offset,end_offset,end_offset-start_offset
Ejemplo n.º 51
0
    def read_all(self, channels, start_offset, end_offset, buffer):
        evs = self.events

        raw_bin_wrappers, original_eeg_files = self.__create_bin_readers()

        # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length()
        time_series_length = self.__compute_time_series_length()

        time_series_data = np.empty(
            (len(channels), len(evs), time_series_length),
            dtype=np.float) * np.nan

        events = []

        newdat_list = []

        # for s,src in enumerate(usources):
        for s, (src,
                eegfile) in enumerate(zip(raw_bin_wrappers,
                                          original_eeg_files)):
            ind = np.atleast_1d(evs.eegfile == eegfile)

            if len(ind) == 1:
                events.append(evs[0])
            else:
                events.append(evs[ind])

            # print event_offsets
            #print "Loading %d events from %s" % (ind.sum(),src)
            # get the timeseries for those events
            newdat = src.get_event_data_xray_simple(channels=channels,
                                                    events=events,
                                                    start_offset=start_offset,
                                                    end_offset=end_offset,
                                                    buffer=buffer)

            newdat_list.append(newdat)

        start_extend_time = time.time()
        #new code
        eventdata = xray.concat(newdat_list, dim='events')
        end_extend_time = time.time()

        # concatenate (must eventually check that dims match)
        # ORIGINAL CODE
        tdim = eventdata['time']
        cdim = eventdata['channels']
        # srate = eventdata.samplerate
        srate = eventdata.attrs['samplerate']

        eventdata_xray = eventdata
        # eventdata_xray = xray.DataArray(np.squeeze(eventdata.values), coords=[cdim,tdim], dims=['channels','time'])
        # eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate']

        if not self.keep_buffer:
            # trimming buffer data samples
            number_of_buffer_samples = self.get_number_of_samples_for_interval(
                self.buffer_time)
            if number_of_buffer_samples > 0:
                eventdata_xray = eventdata_xray[:, :, number_of_buffer_samples:
                                                -number_of_buffer_samples]

        return eventdata_xray
Ejemplo n.º 52
0
    def read(self, channels):
        evs = self.events

        raw_bin_wrappers, original_eeg_files = self.__create_bin_readers()

        # we need to create rawbinwrappers first to figure out sample rate before calling __compute_time_series_length()
        time_series_length = self.__compute_time_series_length()

        time_series_data = np.empty(
            (len(channels), len(evs), time_series_length),
            dtype=np.float) * np.nan

        # usources = np.unique(raw_bin_wrappers)

        ordered_indices = np.arange(len(evs))

        event_indices_list = []

        events = []

        newdat_list = []

        eventdata = None
        # for s,src in enumerate(usources):
        for s, (src,
                eegfile) in enumerate(zip(raw_bin_wrappers,
                                          original_eeg_files)):
            ind = np.atleast_1d(evs.eegfile == eegfile)

            event_indices_list.append(ordered_indices[ind])

            # if verbose:
            #     if not s%10:
            #         print 'Reading event %d'%s
            if len(ind) == 1:
                event_offsets = evs['eegoffset']
                events.append(evs)
            else:
                event_offsets = evs[ind]['eegoffset']
                events.append(evs[ind])

            # print event_offsets
            #print "Loading %d events from %s" % (ind.sum(),src)
            # get the timeseries for those events
            newdat = src.get_event_data_xray(channels,
                                             event_offsets,
                                             self.start_time,
                                             self.end_time,
                                             self.buffer_time,
                                             resampled_rate=None,
                                             filt_freq=None,
                                             filt_type=None,
                                             filt_order=None,
                                             keep_buffer=self.keep_buffer,
                                             loop_axis=None,
                                             num_mp_procs=0,
                                             eoffset='eegoffset',
                                             eoffset_in_time=False)

            newdat_list.append(newdat)

        event_indices_array = np.hstack(event_indices_list)

        event_indices_restore_sort_order_array = event_indices_array.argsort()

        start_extend_time = time.time()
        #new code
        eventdata = xray.concat(newdat_list, dim='events')
        end_extend_time = time.time()

        # concatenate (must eventually check that dims match)
        # ORIGINAL CODE
        tdim = eventdata['time']
        cdim = eventdata['channels']
        # srate = eventdata.samplerate
        srate = eventdata.attrs['samplerate']
        events = np.concatenate(events).view(Events)

        eventdata_xray = xray.DataArray(eventdata.values,
                                        coords=[cdim, events, tdim],
                                        dims=['channels', 'events', 'time'])
        eventdata_xray.attrs['samplerate'] = eventdata.attrs['samplerate']

        eventdata_xray = eventdata_xray[:,
                                        event_indices_restore_sort_order_array, :]  #### RESTORE THIS

        if not self.keep_buffer:
            # trimming buffer data samples
            number_of_buffer_samples = self.get_number_of_samples_for_interval(
                self.buffer_time)
            if number_of_buffer_samples > 0:
                eventdata_xray = eventdata_xray[:, :, number_of_buffer_samples:
                                                -number_of_buffer_samples]

        return TimeSeriesX(eventdata_xray)
Ejemplo n.º 53
0
    def test_concat_errors(self):
        data = create_test_data()
        split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]

        with self.assertRaisesRegexp(ValueError, 'must supply at least one'):
            concat([], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'are not coordinates'):
            concat([data, data], 'new_dim', coords=['not_found'])

        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attrs['foo'] = 'bar'
            concat([data0, data1], 'dim1', compat='identical')
        self.assertDatasetIdentical(
            data, concat([data0, data1], 'dim1', compat='equals'))

        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            concat([data0, data1], 'dim1', coords='minimal')

        with self.assertRaisesRegexp(ValueError, 'it is not 1-dimensional'):
            concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'compat.* invalid'):
            concat(split_data, 'dim1', compat='foobar')

        with self.assertRaisesRegexp(ValueError, 'unexpected value for'):
            concat([data, data], 'new_dim', coords='foobar')

        with self.assertRaisesRegexp(
                ValueError, 'coordinate in some datasets but not others'):
            concat([Dataset({'x': 0}), Dataset({'x': [1]})], dim='z')

        with self.assertRaisesRegexp(
                ValueError, 'coordinate in some datasets but not others'):
            concat([Dataset({'x': 0}), Dataset({}, {'x': 1})], dim='z')

        with self.assertRaisesRegexp(ValueError, 'no longer a valid'):
            concat([data, data], 'new_dim', mode='different')
        with self.assertRaisesRegexp(ValueError, 'no longer a valid'):
            concat([data, data], 'new_dim', concat_over='different')
Ejemplo n.º 54
0
 def test_concat_dim_is_variable(self):
     objs = [Dataset({'x': 0}), Dataset({'x': 1})]
     coord = Variable('y', [3, 4])
     expected = Dataset({'x': ('y', [0, 1]), 'y': [3, 4]})
     actual = concat(objs, coord)
     self.assertDatasetIdentical(actual, expected)
Ejemplo n.º 55
0
 def roll(self, array, n, dim):
     """Clone of numpy.roll for xray DataArrays."""
     left = array.isel(**{dim: slice(None, -n)})
     right = array.isel(**{dim: slice(-n, None)})
     return xray.concat([right, left], dim=dim)
Ejemplo n.º 56
0
'''
NAME
    ECMWF PyToolBox - Merging two or more Netcdf4 files
PURPOSE
    This script merge two or more Netcdf4 files and export a Netcdf4 with diffrent datasets
PROGRAMMER(S)
    Shayan Davarzani ([email protected]) [Master of Engineering - Civil Engineering]
REFERENCES
    Institute of Earth Sciences Coders -- https://iescoders.com/2017/10/03/reading-netcdf4-data-in-python/
    Dr. Ali Asghar Golshani -- My Best and Scientist Teacher -- https://ir.linkedin.com/in/aliasghar-golshani-57a78414/
    IA University Central Tehran Branch-- https://www.iau.ac.ir/
'''
import xray

urls = ["tez\persian-gulf-1979-wave.nc","tez\persian-gulf-1980-1981-wave.nc"] #input files for merging
datasets = [xray.open_dataset(url) for url in urls]
merged = xray.concat(datasets, 'forecast_time')
merged.to_netcdf('all-data.nc')
Ejemplo n.º 57
0
            # set up some vars for the output naming standardization
            cmor_table = os.path.splitext(
                os.path.basename(fn))[0].split('_')[1]
            experiment = scenario = os.path.splitext(
                os.path.basename(fn))[0].split('_')[-2]
            scenario = os.path.splitext(os.path.basename(fn))[0].split('_')[-3]

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            # run the concatenation and the output to a new netcdf file
            # --> and we are writing in a hack to get around the darn issue with GFDL-CM3
            #   we could just run them all with the reduce workaround, but I will keep both
            #   in hopes that the library improves.
            if 'GFDL' in model:
                ds = reduce(lambda x, y: xray.concat([x, y], 'time'),
                            (xray.open_dataset(i) for i in files))
            else:
                ds = xray.concat([xray.open_dataset(i).load() for i in files],
                                 'time')

            new_ds = year_greater_yearlimit_workaround(
                ds, int(begin_year_fnout[:4]), int(end_year_fnout[:4]),
                int(str(begin_year_in)[:4]), int(str(end_year_in)[:4]))
            begin_year_fnout = str(
                int(begin_year_fnout[:4]) +
                (int(begin_year_in[:4]) - int(begin_year_fnout[:4]))
            ) + '01'  # to update the output naming
            # output name generation
            new_fn_base = '_'.join([
                variable, cmor_table, model, scenario, experiment,