def test_init(self): """Test initialisation with lists, tuples, dicts of arrays rather than Columns [regression test for #2647]""" x1 = np.arange(10.) x2 = np.arange(5.) x3 = np.arange(7.) col_list = [('x1', x1), ('x2', x2), ('x3', x3)] tc_list = TableColumns(col_list) for col in col_list: assert col[0] in tc_list assert tc_list[col[0]] is col[1] col_tuple = (('x1', x1), ('x2', x2), ('x3', x3)) tc_tuple = TableColumns(col_tuple) for col in col_tuple: assert col[0] in tc_tuple assert tc_tuple[col[0]] is col[1] col_dict = dict([('x1', x1), ('x2', x2), ('x3', x3)]) tc_dict = TableColumns(col_dict) for col in tc_dict.keys(): assert col in tc_dict assert tc_dict[col] is col_dict[col] columns = [Column(col[1], name=col[0]) for col in col_list] tc = TableColumns(columns) for col in columns: assert col.name in tc assert tc[col.name] is col
def __new__(cls, data=None, name=None, dtype=None, shape=(), length=0, description=None, unit=None, format=None, meta=None, cols=None): self = super(AtmoGrid, cls).__new__(cls, data=data, name=name, dtype=dtype, shape=shape, length=length, description=description, unit=unit, format=format, meta=meta) if cols is None: self.cols = TableColumns([ Column(name=str(i), data=np.arange(self.shape[i], dtype=float)) for i in range(self.ndim) ]) else: if len(cols) != self.ndim: raise ValueError('cols must contain a number of elements equal to the dimension of the data grid.') else: if isinstance(cols, TableColumns): self.cols = cols else: try: self.cols = TableColumns([ Column(name=col[0], data=col[1]) if isinstance(col, (list,tuple)) else col for col in cols ]) except: raise ValueError('Cannot make a TableColumns out of the provided cols parameter.') shape = tuple(col.size for col in self.cols.itervalues()) if self.shape != shape: raise ValueError('The dimension of the data grid and the cols are not matching.') return self
class AtmoGrid(Column): """ Define the base atmosphere grid structure. AtmoGrid contains utilities to trim the grid, read/write to HDF5 format. Parameters ---------- data : ndarray Grid of log(flux) values (e-base) name : str Keyword name of the atmosphere grid dtype : np.dtype compatible value Data type the flux grid shape : tuple or () Dimensions of a single row element in the flux grid length : int or 0 Number of row elements in the grid description : str or None Full description of the atmosphere grid unit : str or None Physical unit format : str or None or function or callable Format string for outputting column values. This can be an "old-style" (``format % value``) or "new-style" (`str.format`) format specification string or a function or any callable object that accepts a single value and returns a string. meta : dict-like or None Meta-data associated with the atmosphere grid cols : OrderedDict-like, list of Columns, list of lists/tuples Full definition of the flux grid axes. This can be a list of entries ('colname', ndarray) with the ndarray corresponding to the axis values or a list of Columns containing this information. Examples -------- A AtmoGrid can be created like this: Examples:: logtemp = np.log(np.arange(3000.,10001.,250.)) logg = np.arange(2.0, 5.6, 0.5) mu = np.arange(0.,1.01,0.02) logflux = np.random.normal(size=(logtemp.size,logg.size,mu.size)) atmo = AtmoGrid(data=logflux, cols=[('logtemp',logtemp), ('logg',logg), ('mu',mu)]) To read/save a file: atmo = AtmoGridPhot.ReadHDF5('vband.h5') atmo.WriteHDF5('vband_new.h5') Notes -------------- Note that in principle the axis and data could be any format. However, we recommend using log(flux) and log(temperature) because the linear interpolation of such a grid would make more sense (say, from the blackbody $F \propto sigma T^4$). """ def __new__(cls, data=None, name=None, dtype=None, shape=(), length=0, description=None, unit=None, format=None, meta=None, cols=None): self = super(AtmoGrid, cls).__new__(cls, data=data, name=name, dtype=dtype, shape=shape, length=length, description=description, unit=unit, format=format, meta=meta) if cols is None: self.cols = TableColumns([ Column(name=str(i), data=np.arange(self.shape[i], dtype=float)) for i in range(self.ndim) ]) else: if len(cols) != self.ndim: raise ValueError('cols must contain a number of elements equal to the dimension of the data grid.') else: if isinstance(cols, TableColumns): self.cols = cols else: try: self.cols = TableColumns([ Column(name=col[0], data=col[1]) if isinstance(col, (list,tuple)) else col for col in cols ]) except: raise ValueError('Cannot make a TableColumns out of the provided cols parameter.') shape = tuple(col.size for col in self.cols.itervalues()) if self.shape != shape: raise ValueError('The dimension of the data grid and the cols are not matching.') return self def __copy__(self): return self.copy(copy_data=False) def __deepcopy__(self): return self.copy(copy_data=True) def __getitem__(self, item): if isinstance(item, six.string_types): if item not in self.colnames: if 'log'+item in self.colnames: return np.exp(self.cols['log'+item]) elif 'log'+item[2:] in self.colnames: return 10**(self.cols['log'+item[2:]]) elif 'log10'+item in self.colnames: return 10**(self.cols['log10'+item]) else: raise Exception('The provided column name is cannot be found.') else: return self.cols[item] else: #return super(AtmoGrid, self).__getitem__(item) return self.view(np.ndarray)[item] @property def colnames(self): return self.cols.keys() def copy(self, order='C', data=None, copy_data=True): """ Copy of the instance. If ``data`` is supplied then a view (reference) of ``data`` is used, and ``copy_data`` is ignored. """ if data is None: data = self.view(np.ndarray) if copy_data: data = data.copy(order) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=deepcopy(self.meta), cols=self.cols) def Fill_nan_old(self, axis=0, method='spline', bounds_error=False, fill_value=np.nan, k=1, s=1): """ Fill the empty grid cells (marked as np.nan) with interpolated values along a given axis (i.e. interpolation is done in 1D). Parameters ---------- axis : interpolate Axis along which the interpolation should be performed. method : str Interpolation method to use. Possible choices are 'spline' and 'interp1d'. 'spline' allows for the use of optional keywords k (the order) and s (the smoothing parameter). See scipy.interpolate.splrep. 'interp1d' allows for the use of optional keywords bounds_error and fill_value. See scipy.interpolate.interp1d. bounds_error : bool Whether to raise an error when attempting to extrapolate out of bounds. Only works with 'interp1d'. fill_value : float Value to use when bounds_error is False. Only works with 'interp1d'. k : int Order of the spline to use. We recommend 1. Only works with 'spline'. s : int Smoothing parameter for the spline. We recommend 0 (exact interpolation), or 1. Only works with 'spline'. Examples ---------- Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) This would fill in the value that are not out of bound with a linear fit. Values out of bound would be np.nan. atmo.Fill_nan(axis=0, method='spline', k=1, s=0) This would produce exactly the same interpolation as above, except that values out of bound would be extrapolated. Notes ---------- From our experience, it is recommended to first fill the values within the bounds using 'interp1d' with bounds_error=False and fill_value=np.nan, and then use 'spline' with k=1 and s=1 in order to extrapolate outside the bounds. To interpolate within the bounds, the temperature axis (i.e. 0) is generally best and more smooth, whereas the logg axis (i.e. 1) works better to extrapolate outside. Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) atmo.Fill_nan(axis=1, method='spline', k=1, s=1) """ if method not in ['interp1d','spline']: raise Exception('Wrong method input! Must be either interp1d, spline or grid.') ndim = list(self.shape) ndim.pop(axis) inds_tmp = np.indices(ndim) inds = [ind.flatten() for ind in inds_tmp] niter = len(inds[0]) inds.insert(axis, [slice(None)]*niter) print(inds) for ind in zip(*inds): col = self.__getitem__(ind) inds_good = np.isfinite(col) inds_bad = ~inds_good if np.any(inds_bad): if method == 'interp1d': interpolator = scipy.interpolate.interp1d(self.cols[axis][inds_good], col[inds_good], assume_sorted=True, bounds_error=bounds_error, fill_value=fill_value) col[inds_bad] = interpolator(self.cols[axis][inds_bad]) elif method == 'spline': tck = scipy.interpolate.splrep(self.cols[axis][inds_good], col[inds_good], k=k, s=s) col[inds_bad] = scipy.interpolate.splev(self.cols[axis][inds_bad], tck) def Fill_nan(self, axis=0, inds_fill=None, method='spline', bounds_error=False, fill_value=np.nan, k=1, s=0, extrapolate=True): """ Fill the empty grid cells (marked as np.nan) with interpolated values along a given axis (i.e. interpolation is done in 1D). Parameters ---------- axis : interpolate Axis along which the interpolation should be performed. inds_fill : tuple(ndarray) Tuple/list containing the list of pixels to interpolate for. method : str Interpolation method to use. Possible choices are 'spline', 'interp1d' and 'pchip'. 'spline' allows for the use of optional keywords k (the order) and s (the smoothing parameter). See scipy.interpolate.splrep. 'interp1d' allows for the use of optional keywords bounds_error and fill_value. See scipy.interpolate.interp1d. 'pchip' allows to interpolate out of bound, or set NaNs. bounds_error : bool Whether to raise an error when attempting to extrapolate out of bounds. Only works with 'interp1d'. fill_value : float Value to use when bounds_error is False. Only works with 'interp1d'. k : int Order of the spline to use. We recommend 1. Only works with 'spline'. s : int Smoothing parameter for the spline. We recommend 0 (exact interpolation). Only works with 'spline'. extrapolate : bool Whether to extrapolate out of bound or set NaNs. Only works with 'pchip'. Examples ---------- Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) This would fill in the value that are not out of bound with a linear fit. Values out of bound would be np.nan. atmo.Fill_nan(axis=0, method='spline', k=1, s=0) This would produce exactly the same interpolation as above, except that values out of bound would be extrapolated. Notes ---------- From our experience, it is recommended to first fill the values within the bounds using 'interp1d' with bounds_error=False and fill_value=np.nan, and then use 'spline' with k=1 and s=1 in order to extrapolate outside the bounds. To interpolate within the bounds, the temperature axis (i.e. 0) is generally best and more smooth, whereas the logg axis (i.e. 1) works better to extrapolate outside. Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) atmo.Fill_nan(axis=1, method='spline', k=1, s=1) """ if method not in ['interp1d','spline','pchip']: raise Exception('Wrong method input! Must be either interp1d, spline or grid.') if inds_fill is None: inds_fill = np.isnan(self.data).nonzero() else: assert len(inds_fill) == self.ndim, "The shape must be (ndim, nfill)." vals_fill = [] for inds_fill_ in zip(*inds_fill): #print(inds_fill_) inds = list(inds_fill_) inds[axis] = slice(None) inds = tuple(inds) y = self.data[inds] x = self.cols[axis] x_interp = x[inds_fill_[axis]] #print(x) #print(y) #print(x_interp) inds_bad = np.isnan(y) inds_bad[x_interp] = True inds_good = ~inds_bad #print(inds_bad) #print(inds_good) if np.any(inds_good): #print(x[inds_good]) #print(y[inds_good]) #print(x_interp) if method == 'interp1d': interpolator = scipy.interpolate.interp1d(x[inds_good], y[inds_good], assume_sorted=True, bounds_error=bounds_error, fill_value=fill_value) y_interp = interpolator(x_interp) elif method == 'spline': tck = scipy.interpolate.splrep(x[inds_good], y[inds_good], k=k, s=s) y_interp = scipy.interpolate.splev(x_interp, tck) elif method == 'pchip': interpolator = scipy.interpolate.PchipInterpolator(x[inds_good], y[inds_good], axis=0, extrapolate=extrapolate) y_interp = interpolator(x_interp) #print(y_interp) vals_fill.append(y_interp) else: #print('y_interp -> nan') vals_fill.append(np.nan) #print('x_interp', x_interp) #print('y_interp', y_interp) self.data[inds_fill] = vals_fill def Getaxispos(self, colname, x): """ Return the index and weight of the linear interpolation of the point along a given axis. Parameters ---------- colname : str Name of the axis to interpolate from. x : float, ndarray Value to interpolate at. Examples ---------- Examples:: temp = Getaxispos('logtemp', np.log(3550.) logg = Getaxispos('logg', [4.11,4.13,4.02]) """ if isinstance(x, (list, tuple, np.ndarray)): return Utils.Series.Getaxispos_vector(self.cols[colname], x) else: return Utils.Series.Getaxispos_scalar(self.cols[colname], x) @property def IsFinite(self): return np.isfinite(self.data).astype(int) def Pprint(self, slices): """ Print a 2-dimensional slice of the atmosphere grid for visualisation. Parameters ---------- slices : list List of sliceable elements to extract the 2-dim slice to display. Examples ---------- Examples:: # Display the equivalent of atmo[:,:,4] atmo.Pprint([None,None,4]) # Same as above but using fancier slice objects atmo.Pprint([slice(None),slice(None),4]) # Display the equivalent of atmo[3:9,3,:] atmo.Pprint([slice(3,9),3,None]) """ slices = list(slices) labels = [] for i,s in enumerate(slices): if s is None: s = slice(None) slices[i] = s if isinstance(s, (int,slice)): tmp_label = self.cols[i][s] if self.colnames[i] == 'logtemp': tmp_label = np.exp(tmp_label) if tmp_label.size > 1: labels.append(tmp_label) else: raise Exception("The element {} is not a slice or integer or cannot be converted to a sliceable entity.".format(s)) if len(labels) != 2: raise Exception("The slices should generate a 2 dimensional array. Verify your input slices.") t = Table(data=self.__getitem__(slices), names=labels[1].astype(str), copy=True) t.add_column(Column(data=labels[0]), index=0) t.pprint() @classmethod def ReadHDF5(cls, fln): try: import h5py except ImportError: raise Exception("h5py is needed for ReadHDF5") f = h5py.File(fln, 'r') flux = f['flux'].value meta = {} for key_attrs, val_attrs in f.attrs.iteritems(): meta[key_attrs] = val_attrs colnames = meta.pop('colnames') name = meta.pop('name') description = meta.pop('description') cols = [] grp = f['cols'] for col in colnames: dset = grp[col] cols.append( Column(data=dset.value, name=col, meta=dict(dset.attrs.iteritems())) ) cols = TableColumns(cols) f.close() return cls(data=flux, name=name, description=description, meta=meta, cols=cols) def SubGrid(self, *args): """ Return a sub-grid of the atmosphere grid. Parameters ---------- slices : slice Slice/sliceable object for each dimension of the atmosphere grid. Examples ---------- Examples:: This would extract atmo[:,1:4,:] new_atmo = atmo.SubGrid(slice(None),slice(1,4),slice(None)) """ assert len(args) == self.ndim, "The number of slices must match the dimension of the atmosphere grid." slices = [] for s in args: if isinstance(s,int): slices.append(slice(s,s+1)) else: slices.append(s) data = self.data[slices] cols = [] for c,s in zip(self.cols,slices): cols.append( (c, np.atleast_1d(self.cols[c][s])) ) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=self.meta, cols=cols) def Trim(self, colname, low=None, high=None): """ Return a copy of the atmosphere grid whose 'colname' axis has been trimmed at the 'low' and 'high' values: low <= colvalues <= high. Parameters ---------- colname : str Name of the column to trim the grid on. low : float Lowest value to cut from. If None, will use the minimum value. high: float Highest value to cut from. If None, will use the maximum value. Examples ---------- Examples:: The following would trim along the temperature axis and keep values between 4000 and 6000, inclusively. new_atmo = atmo.Trim('logtemp', low=np.log(4000.), high=np.log(6000.)) """ if colname not in self.colnames: raise Exception("The provided column name is not valid.") colind = self.colnames.index(colname) cols = self.cols.copy() if low is None: low = cols[colname].min() if high is None: high = cols[colname].max() inds = [slice(None)]*self.ndim inds[colind] = np.logical_and(self.cols[colname] >= low, self.cols[colname] <= high) cols[colname] = Column(data=cols[colname][inds[colind]], name=colname) data = self.data[inds].copy() meta = deepcopy(self.meta) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=meta, cols=cols) def WriteHDF5(self, fln, overwrite=False): try: import h5py except ImportError: raise Exception("h5py is needed for WriteHDF5") if os.path.exists(fln): if overwrite: os.remove(fln) else: raise IOError("File exists: {}".format(fln)) f = h5py.File(fln, 'w') f.create_dataset(name='flux', data=self.data) f.attrs['colnames'] = self.cols.keys() f.attrs['name'] = self.name f.attrs['description'] = self.description for key_attrs, val_attrs in self.meta.iteritems(): f.attrs[key_attrs] = val_attrs grp = f.create_group('cols') for key, val in self.cols.iteritems(): dset = grp.create_dataset(name=key, data=val) if hasattr(val, 'meta'): for key_attrs, val_attrs in val.meta.iteritems(): dset.attrs[key_attrs] = val_attrs f.close()
class AtmoGrid(Column): """ Define the base atmosphere grid structure. AtmoGrid contains utilities to trim the grid, read/write to HDF5 format. Parameters ---------- data : ndarray Grid of log(flux) values (e-base) name : str Keyword name of the atmosphere grid dtype : np.dtype compatible value Data type the flux grid shape : tuple or () Dimensions of a single row element in the flux grid length : int or 0 Number of row elements in the grid description : str or None Full description of the atmosphere grid unit : str or None Physical unit format : str or None or function or callable Format string for outputting column values. This can be an "old-style" (``format % value``) or "new-style" (`str.format`) format specification string or a function or any callable object that accepts a single value and returns a string. meta : dict-like or None Meta-data associated with the atmosphere grid cols : OrderedDict-like, list of Columns, list of lists/tuples Full definition of the flux grid axes. This can be a list of entries ('colname', ndarray) with the ndarray corresponding to the axis values or a list of Columns containing this information. Examples -------- A AtmoGrid can be created like this: Examples:: logtemp = np.log(np.arange(3000.,10001.,250.)) logg = np.arange(2.0, 5.6, 0.5) mu = np.arange(0.,1.01,0.02) logflux = np.random.normal(size=(logtemp.size,logg.size,mu.size)) atmo = AtmoGrid(data=logflux, cols=[('logtemp',logtemp), ('logg',logg), ('mu',mu)]) To read/save a file: atmo = AtmoGridPhot.ReadHDF5('vband.h5') atmo.WriteHDF5('vband_new.h5') Notes -------------- Note that in principle the axis and data could be any format. However, we recommend using log(flux) and log(temperature) because the linear interpolation of such a grid would make more sense (say, from the blackbody $F \propto sigma T^4$). """ def __new__(cls, data=None, name=None, dtype=None, shape=(), length=0, description=None, unit=None, format=None, meta=None, cols=None): self = super(AtmoGrid, cls).__new__(cls, data=data, name=name, dtype=dtype, shape=shape, length=length, description=description, unit=unit, format=format, meta=meta) if cols is None: self.cols = TableColumns([ Column(name=str(i), data=np.arange(self.shape[i], dtype=float)) for i in range(self.ndim) ]) else: if len(cols) != self.ndim: raise ValueError('cols must contain a number of elements equal to the dimension of the data grid.') else: if isinstance(cols, TableColumns): self.cols = cols else: try: self.cols = TableColumns([ Column(name=col[0], data=col[1]) if isinstance(col, (list,tuple)) else col for col in cols ]) except: raise ValueError('Cannot make a TableColumns out of the provided cols parameter.') shape = tuple(col.size for col in self.cols.itervalues()) if self.shape != shape: raise ValueError('The dimension of the data grid and the cols are not matching.') return self def __copy__(self): return self.copy(copy_data=False) def __deepcopy__(self): return self.copy(copy_data=True) def __getitem__(self, item): if isinstance(item, six.string_types): if item not in self.colnames: if 'log'+item in self.colnames: return np.exp(self.cols['log'+item]) elif 'log'+item[2:] in self.colnames: return 10**(self.cols['log'+item[2:]]) elif 'log10'+item in self.colnames: return 10**(self.cols['log10'+item]) else: raise Exception('The provided column name is cannot be found.') else: return self.cols[item] else: #return super(AtmoGrid, self).__getitem__(item) return self.view(np.ndarray)[item] @property def colnames(self): return self.cols.keys() def copy(self, order='C', data=None, copy_data=True): """ Copy of the instance. If ``data`` is supplied then a view (reference) of ``data`` is used, and ``copy_data`` is ignored. """ if data is None: data = self.view(np.ndarray) if copy_data: data = data.copy(order) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=deepcopy(self.meta), cols=self.cols) def Fill_nan_old(self, axis=0, method='spline', bounds_error=False, fill_value=np.nan, k=1, s=1): """ Fill the empty grid cells (marked as np.nan) with interpolated values along a given axis (i.e. interpolation is done in 1D). Parameters ---------- axis : interpolate Axis along which the interpolation should be performed. method : str Interpolation method to use. Possible choices are 'spline' and 'interp1d'. 'spline' allows for the use of optional keywords k (the order) and s (the smoothing parameter). See scipy.interpolate.splrep. 'interp1d' allows for the use of optional keywords bounds_error and fill_value. See scipy.interpolate.interp1d. bounds_error : bool Whether to raise an error when attempting to extrapolate out of bounds. Only works with 'interp1d'. fill_value : float Value to use when bounds_error is False. Only works with 'interp1d'. k : int Order of the spline to use. We recommend 1. Only works with 'spline'. s : int Smoothing parameter for the spline. We recommend 0 (exact interpolation), or 1. Only works with 'spline'. Examples ---------- Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) This would fill in the value that are not out of bound with a linear fit. Values out of bound would be np.nan. atmo.Fill_nan(axis=0, method='spline', k=1, s=0) This would produce exactly the same interpolation as above, except that values out of bound would be extrapolated. Notes ---------- From our experience, it is recommended to first fill the values within the bounds using 'interp1d' with bounds_error=False and fill_value=np.nan, and then use 'spline' with k=1 and s=1 in order to extrapolate outside the bounds. To interpolate within the bounds, the temperature axis (i.e. 0) is generally best and more smooth, whereas the logg axis (i.e. 1) works better to extrapolate outside. Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) atmo.Fill_nan(axis=1, method='spline', k=1, s=1) """ if method not in ['interp1d','spline']: raise Exception('Wrong method input! Must be either interp1d, spline or grid.') ndim = list(self.shape) ndim.pop(axis) inds_tmp = np.indices(ndim) inds = [ind.flatten() for ind in inds_tmp] niter = len(inds[0]) inds.insert(axis, [slice(None)]*niter) print(inds) for ind in zip(*inds): col = self.__getitem__(ind) inds_good = np.isfinite(col) inds_bad = ~inds_good if np.any(inds_bad): if method == 'interp1d': interpolator = scipy.interpolate.interp1d(self.cols[axis][inds_good], col[inds_good], assume_sorted=True, bounds_error=bounds_error, fill_value=fill_value) col[inds_bad] = interpolator(self.cols[axis][inds_bad]) elif method == 'spline': tck = scipy.interpolate.splrep(self.cols[axis][inds_good], col[inds_good], k=k, s=s) col[inds_bad] = scipy.interpolate.splev(self.cols[axis][inds_bad], tck) def Fill_nan(self, axis=0, inds_fill=None, method='spline', bounds_error=False, fill_value=np.nan, k=1, s=0, extrapolate=True): """ Fill the empty grid cells (marked as np.nan) with interpolated values along a given axis (i.e. interpolation is done in 1D). Parameters ---------- axis : interpolate Axis along which the interpolation should be performed. inds_fill : tuple(ndarray) Tuple/list containing the list of pixels to interpolate for. method : str Interpolation method to use. Possible choices are 'spline', 'interp1d' and 'pchip'. 'spline' allows for the use of optional keywords k (the order) and s (the smoothing parameter). See scipy.interpolate.splrep. 'interp1d' allows for the use of optional keywords bounds_error and fill_value. See scipy.interpolate.interp1d. 'pchip' allows to interpolate out of bound, or set NaNs. bounds_error : bool Whether to raise an error when attempting to extrapolate out of bounds. Only works with 'interp1d'. fill_value : float Value to use when bounds_error is False. Only works with 'interp1d'. k : int Order of the spline to use. We recommend 1. Only works with 'spline'. s : int Smoothing parameter for the spline. We recommend 0 (exact interpolation). Only works with 'spline'. extrapolate : bool Whether to extrapolate out of bound or set NaNs. Only works with 'pchip'. Examples ---------- Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) This would fill in the value that are not out of bound with a linear fit. Values out of bound would be np.nan. atmo.Fill_nan(axis=0, method='spline', k=1, s=0) This would produce exactly the same interpolation as above, except that values out of bound would be extrapolated. Notes ---------- From our experience, it is recommended to first fill the values within the bounds using 'interp1d' with bounds_error=False and fill_value=np.nan, and then use 'spline' with k=1 and s=1 in order to extrapolate outside the bounds. To interpolate within the bounds, the temperature axis (i.e. 0) is generally best and more smooth, whereas the logg axis (i.e. 1) works better to extrapolate outside. Examples:: atmo.Fill_nan(axis=0, method='interp1d', bounds_error=False, fill_value=np.nan) atmo.Fill_nan(axis=1, method='spline', k=1, s=1) """ if method not in ['interp1d','spline','pchip']: raise Exception('Wrong method input! Must be either interp1d, spline or grid.') if inds_fill is None: inds_fill = np.isnan(self.data).nonzero() else: assert len(inds_fill) == self.ndim, "The shape must be (ndim, nfill)." vals_fill = [] for inds_fill_ in zip(*inds_fill): #print(inds_fill_) inds = list(inds_fill_) inds[axis] = slice(None) inds = tuple(inds) y = self.data[inds] x = self.cols[axis] x_interp = x[inds_fill_[axis]] #print(x) #print(y) #print(x_interp) inds_bad = np.isnan(y) inds_bad[x_interp] = True inds_good = ~inds_bad #print(inds_bad) #print(inds_good) if np.any(inds_good): #print(x[inds_good]) #print(y[inds_good]) #print(x_interp) if method == 'interp1d': interpolator = scipy.interpolate.interp1d(x[inds_good], y[inds_good], assume_sorted=True, bounds_error=bounds_error, fill_value=fill_value) y_interp = interpolator(x_interp) elif method == 'spline': tck = scipy.interpolate.splrep(x[inds_good], y[inds_good], k=k, s=s) y_interp = scipy.interpolate.splev(x_interp, tck) elif method == 'pchip': interpolator = scipy.interpolate.PchipInterpolator(x[inds_good], y[inds_good], axis=0, extrapolate=extrapolate) y_interp = interpolator(x_interp) #print(y_interp) vals_fill.append(y_interp) else: #print('y_interp -> nan') vals_fill.append(np.nan) #print('x_interp', x_interp) #print('y_interp', y_interp) self.data[inds_fill] = vals_fill def Getaxispos(self, colname, x): """ Return the index and weight of the linear interpolation of the point along a given axis. Parameters ---------- colname : str Name of the axis to interpolate from. x : float, ndarray Value to interpolate at. Examples ---------- Examples:: temp = Getaxispos('logtemp', np.log(3550.) logg = Getaxispos('logg', [4.11,4.13,4.02]) """ if isinstance(x, (list, tuple, np.ndarray)): return Utils.Series.Getaxispos_vector(self.cols[colname], x) else: return Utils.Series.Getaxispos_scalar(self.cols[colname], x) @property def IsFinite(self): return np.isfinite(self.data).astype(int) def Pprint(self, slices): """ Print a 2-dimensional slice of the atmosphere grid for visualisation. Parameters ---------- slices : list List of sliceable elements to extract the 2-dim slice to display. Examples ---------- Examples:: # Display the equivalent of atmo[:,:,4] atmo.Pprint([None,None,4]) # Same as above but using fancier slice objects atmo.Pprint([slice(None),slice(None),4]) # Display the equivalent of atmo[3:9,3,:] atmo.Pprint([slice(3,9),3,None]) """ slices = list(slices) labels = [] for i,s in enumerate(slices): if s is None: s = slice(None) slices[i] = s if isinstance(s, (int,slice)): tmp_label = self.cols[i][s] if self.colnames[i] == 'logtemp': tmp_label = np.exp(tmp_label) if tmp_label.size > 1: labels.append(tmp_label) else: raise Exception("The element {} is not a slice or integer or cannot be converted to a sliceable entity.".format(s)) if len(labels) != 2: raise Exception("The slices should generate a 2 dimensional array. Verify your input slices.") t = Table(data=self.__getitem__(slices), names=labels[1].astype(str), copy=True) t.add_column(Column(data=labels[0]), index=0) t.pprint() @classmethod def ReadHDF5(cls, fln): try: import h5py except ImportError: raise Exception("h5py is needed for ReadHDF5") f = h5py.File(fln, 'r') flux = np.ascontiguousarray(f['flux'].value, dtype=float) meta = {} for key_attrs, val_attrs in f.attrs.iteritems(): meta[key_attrs] = val_attrs colnames = meta.pop('colnames') name = meta.pop('name') description = meta.pop('description') cols = [] grp = f['cols'] for col in colnames: dset = grp[col] cols.append( Column(data=np.ascontiguousarray(dset.value), name=col, meta=dict(dset.attrs.iteritems())) ) cols = TableColumns(cols) f.close() return cls(data=flux, name=name, description=description, meta=meta, cols=cols) def SubGrid(self, *args): """ Return a sub-grid of the atmosphere grid. Parameters ---------- slices : slice Slice/sliceable object for each dimension of the atmosphere grid. Examples ---------- Examples:: This would extract atmo[:,1:4,:] new_atmo = atmo.SubGrid(slice(None),slice(1,4),slice(None)) """ assert len(args) == self.ndim, "The number of slices must match the dimension of the atmosphere grid." slices = [] for s in args: if isinstance(s,int): slices.append(slice(s,s+1)) else: slices.append(s) data = self.data[slices] cols = [] for c,s in zip(self.cols,slices): cols.append( (c, np.atleast_1d(self.cols[c][s])) ) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=self.meta, cols=cols) def Trim(self, colname, low=None, high=None): """ Return a copy of the atmosphere grid whose 'colname' axis has been trimmed at the 'low' and 'high' values: low <= colvalues <= high. Parameters ---------- colname : str Name of the column to trim the grid on. low : float Lowest value to cut from. If None, will use the minimum value. high: float Highest value to cut from. If None, will use the maximum value. Examples ---------- Examples:: The following would trim along the temperature axis and keep values between 4000 and 6000, inclusively. new_atmo = atmo.Trim('logtemp', low=np.log(4000.), high=np.log(6000.)) """ if colname not in self.colnames: raise Exception("The provided column name is not valid.") slices = [slice(None)]*self.ndim colind = self.colnames.index(colname) if low is None: low = cols[colname].min() if high is None: high = cols[colname].max() slices[colind] = np.logical_and(self.cols[colname] >= low, self.cols[colname] <= high) cols = [] for c,s in zip(self.cols,slices): cols.append( (c, np.atleast_1d(self.cols[c][s])) ) data = self.data[slices].copy() meta = deepcopy(self.meta) return self.__class__(name=self.name, data=data, unit=self.unit, format=self.format, description=self.description, meta=meta, cols=cols) def WriteHDF5(self, fln, overwrite=False): try: import h5py except ImportError: raise Exception("h5py is needed for WriteHDF5") if os.path.exists(fln): if overwrite: os.remove(fln) else: raise IOError("File exists: {}".format(fln)) f = h5py.File(fln, 'w') f.create_dataset(name='flux', data=self.data) f.attrs['colnames'] = self.cols.keys() f.attrs['name'] = self.name f.attrs['description'] = self.description for key_attrs, val_attrs in self.meta.iteritems(): f.attrs[key_attrs] = val_attrs grp = f.create_group('cols') for key, val in self.cols.iteritems(): dset = grp.create_dataset(name=key, data=val) if hasattr(val, 'meta'): for key_attrs, val_attrs in val.meta.iteritems(): dset.attrs[key_attrs] = val_attrs f.close()
def fit(planfile, model_name=None, spectrum_filenames=None, threads=8, clobber=True, from_filename=False, fit_velocity=False, chunk_size=1000, output_suffix=None, **kwargs): """ Fit a series of spectra. """ p = yanny.yanny(planfile, np=True) apred = p['apred_vers'].strip("'") apstar = p['apstar_vers'].strip("'") aspcap = p['aspcap_vers'].strip("'") results = p['results_vers'].strip("'") threads = int(getval(p, 'ncpus', '16')) cannon = getval(p, 'cannon_vers', 'cannon_aspcap') if model_name is None: model_name = getval(p, 'model_name', 'apogee-dr14-giants') if output_suffix is None: output_suffix = getval(p, 'output_suffix', 'result') logg = getrange(getval(p, 'logg', '-1 3.9')) teff = getrange(getval(p, 'teff', '3500 5500')) mh = getrange(getval(p, 'mh', '-3. 1.')) alpha = getrange(getval(p, 'alpha', '-0.5 1.')) root = os.environ[ 'APOGEE_REDUX'] + '/' + apred + '/' + apstar + '/' + aspcap + '/' + results + '/' + cannon + '/' model = tc.load_model(os.path.join(root, "{}.model".format(model_name)), threads=threads) assert model.is_trained label_names = model.vectorizer.label_names mean_labels = Table.read(os.path.join(root, "{}.initial".format(model_name)), format='ascii')['col0'] sig_labels = Table.read(os.path.join(root, "{}.initial".format(model_name)), format='ascii')['col2'] #mean_labels = np.loadtxt(os.path.join(root, "{}.initial".format(model_name))) logger = logging.getLogger("AnniesLasso") # get allStar file for initial labels apl = apload.ApLoad(apred=apred, apstar=apstar, aspcap=aspcap, results=results) allstar = apl.allStar()[1].data # loop over fields in planfile for field in p['ASPCAP']['field']: metadatas = [] fluxes = [] ivars = [] output_filenames = [] apogee_names = [] failures = 0 # get file names to fit try: paths = getfiles(apred, apstar, aspcap, results, cannon, field) except: return spectrum_filenames = [] initial_labels = [] apogee_ids = [] for apogee_id, inpath, outpath in paths: # only take stars within certain parameter ranges print(apogee_id) #j=apselect.select(allstar,redid=apogee_id)[0] j = np.where(((allstar['REDUCTION_ID'] == apogee_id) | (allstar['APOGEE_ID'] == apogee_id)) & (allstar['COMMISS'] == 0))[0] if (len(j) == 0): print('missing target', apogee_id) else: if len(j) > 1: j = j[0] if ((allstar['FPARAM'][j, 1] >= logg[0]) & (allstar['FPARAM'][j, 1] <= logg[1]) & (allstar['FPARAM'][j, 0] >= teff[0]) & (allstar['FPARAM'][j, 0] <= teff[1]) & (allstar['FPARAM'][j, 3] >= mh[0]) & (allstar['FPARAM'][j, 3] <= mh[1]) & (allstar['FPARAM'][j, 6] >= alpha[0]) & (allstar['FPARAM'][j, 6] <= alpha[1])): spectrum_filenames.append(outpath) apogee_names.append(apogee_id) #labels=[] #for i,label in enumerate(label_names) : # if allstar[label][j][0] > -9 : # labels.append(allstar[label][j][0]) # else : # labels.append(mean_labels[i]) #initial_labels.append(labels) if len(apogee_names) == 0: return #initial_labels=np.array(initial_labels) initial_labels = mean_labels # MAGIC HACK delete_meta_keys = ("fjac", ) # To save space... #output_suffix = kwargs.get("output_suffix", None) #output_suffix = "result" if output_suffix is None else str(output_suffix) summary_file = root + field + '/cannonField-' + os.path.basename( field) + '-' + output_suffix + '.fits' N = len(spectrum_filenames) for i, names in enumerate(zip(apogee_names, spectrum_filenames)): apogee_id = names[0] filename = names[1] logger.info("At spectrum {0}/{1}: {2}".format(i + 1, N, filename)) basename, _ = os.path.splitext(filename) output_filename = "-".join([basename, output_suffix]) + ".pkl" if os.path.exists(output_filename) and not clobber: logger.info("Output filename {} already exists and not clobbering."\ .format(output_filename)) continue try: with open(filename, "rb") as fp: metadata, data = pickle.load(fp) metadatas.append(metadata) flux, ivar = data fluxes.append(flux) ivars.append(ivar) output_filenames.append(output_filename) apogee_ids.append(apogee_id) except: logger.exception("Error occurred loading {}".format(filename)) failures += 1 else: if len(output_filenames) >= chunk_size: results, covs, metas = model.fit( fluxes, ivars, initial_labels=initial_labels, model_redshift=fit_velocity, full_output=True) for result, cov, meta, output_filename \ in zip(results, covs, metas, output_filenames): for key in delete_meta_keys: if key in meta: del meta[key] with open(output_filename, "wb") as fp: pickle.dump((result, cov, meta), fp, 2) # For legacy. logger.info( "Saved output to {}".format(output_filename)) del output_filenames[0:], fluxes[0:], ivars[0:] if len(output_filenames) > 0: results, covs, metas = model.fit(fluxes, ivars, initial_labels=initial_labels, model_redshift=fit_velocity, full_output=True) # Create an ordered dictionary of lists for all the data. data_dict = OrderedDict([("FILENAME", [])]) data_dict['APOGEE_ID'] = [] data_dict['LOCATION_ID'] = [] data_dict['FIELD'] = [] for label_name in label_names: data_dict[label_name] = [] for label_name in label_names: data_dict["{}_RAWERR".format(label_name)] = [] for label_name in label_names: data_dict["{}_ERR".format(label_name)] = [] #data_dict["COV"] = [] #meta_keys=metas[0].keys() meta_keys = ['chi_sq', 'r_chi_sq', 'model_flux'] for key in meta_keys: data_dict[key] = [] data_dict['flux'] = [] data_dict['ivar'] = [] # loop over spectra, output individual files, and accumulate for summary file for result, cov, meta, output_filename,apogee_id,metadata,flux,ivar \ in zip(results, covs, metas, output_filenames, apogee_ids,metadatas,fluxes,ivars): if np.isfinite(result).all(): outlist = [ os.path.basename(output_filename), apogee_id, metadata['LOCATION_ID'], metadata['FIELD'] ] + result.tolist() try: rawerr = np.diag(cov)**0.5 outlist.extend(rawerr) except: pdb.set_trace() outlist.extend(np.max([rawerr, sig_labels], axis=0)) #outlist.append(cov.tolist()) for key in delete_meta_keys: if key in meta: del meta[key] #outlist += [meta.get(k, v) for k, v in meta.items()] outlist += [meta.get(k) for k in meta_keys] outlist.append(flux) outlist.append(ivar) for key, value in zip(data_dict.keys(), outlist): data_dict[key].append(value) # save to pkl file? #with open(output_filename, "wb") as fp: # pickle.dump((result, cov, meta), fp, 2) # For legacy. #logger.info("Saved output to {}".format(output_filename)) # save to FITS cannonStar file hdr = fits.Header() hdr['HISTORY'] = 'IDLWRAP_VERSION: ' + subprocess.check_output( 'idlwrap_version').strip('\n') hdr['OBJ'] = apogee_id hdr['LOCID'] = metadata['LOCATION_ID'] hdr['FIELD'] = metadata['FIELD'] hdr['CHI2'] = meta.get('r_chi_sq') for i, label_name in enumerate(label_names): hdr[label_name] = result[i] hdulist = fits.HDUList(fits.PrimaryHDU(header=hdr)) hdr = fits.Header() hdr['OBSERVER'] = 'Edwin Hubble' hdr['CRVAL1'] = 4.179e0 hdr['CDELT1'] = 6.e-6 hdr['CRPIX1'] = 1 hdr['CTYPE1'] = 'LOG-LINEAR' hdr['DC-FLAG'] = 1 hdulist.append(fits.ImageHDU(flux, header=hdr)) hdulist.append( fits.ImageHDU(1. / np.sqrt(ivar), header=hdr)) hdulist.append( fits.ImageHDU(meta.get('model_flux'), header=hdr)) hdulist.writeto(output_filename.replace( '-result', '').replace('.pkl', '.fits'), overwrite=True) del output_filenames[0:], fluxes[0:], ivars[0:] logger.info("Number of failures: {}".format(failures)) logger.info("Number of successes: {}".format(N - failures)) table = Table(TableColumns(data_dict)) table.write(summary_file.replace('-result', ''), overwrite=clobber) logger.info("Written to {}".format(summary_file)) return None