def _nddata_to_glue_data(ndd, data_label): if ndd.data.ndim != 2: raise ValueError(f'Imviz cannot load this NDData with ndim={ndd.data.ndim}') for attrib in ['data', 'mask', 'uncertainty']: arr = getattr(ndd, attrib) if arr is None: continue comp_label = attrib.upper() cur_label = f'{data_label}[{comp_label}]' cur_data = Data(label=cur_label) cur_data.meta.update(ndd.meta) if ndd.wcs is not None: cur_data.coords = ndd.wcs raw_arr = arr if attrib == 'data': bunit = ndd.unit or '' elif attrib == 'uncertainty': raw_arr = arr.array bunit = arr.unit or '' else: bunit = '' component = Component.autotyped(raw_arr, units=bunit) cur_data.add_component(component=component, label=comp_label) yield cur_data, cur_label
def test_image(): data = Data(label="Test Image") comp_a = Component(np.ones((25, 25))) data.add_component(comp_a, 'test_1') comp_b = Component(np.zeros((25, 25))) data.add_component(comp_b, 'test_2') return data
def astropy_tabular_data(*args, **kwargs): """ Build a data set from a table. We restrict ourselves to tables with 1D columns. All arguments are passed to astropy.table.Table.read(...). """ result = Data() table = astropy_table_read(*args, **kwargs) result.meta = table.meta # Loop through columns and make component list for column_name in table.columns: c = table[column_name] u = c.unit if hasattr(c, 'unit') else c.units if table.masked: # fill array for now try: c = c.filled(fill_value=np.nan) except (ValueError, TypeError): # assigning nan to integer dtype c = c.filled(fill_value=-1) nc = Component.autotyped(c, units=u) result.add_component(nc, column_name) return result
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) master.coords = data[0].coords for i, d in enumerate(data): if isinstance(d.coords, WCSCoordinates): master.coords = d.coords break # Find ambiguous components (ones which have labels in more than one # dataset from collections import Counter clabel_count = Counter( [c.label for d in data for c in d.visible_components]) for d in data: for c in d.components: if c in master.components: # already present (via a link) continue lbl = c.label if clabel_count[lbl] > 1: lbl = lbl + " [{0}]".format(d.label) c._label = lbl c.parent = master master.add_component(d.get_component(c), c) self.remove(d) return self
def test_histogram_data(): data = Data(label="Test Data") comp_a = Component(np.random.uniform(size=500)) comp_b = Component(np.random.normal(size=500)) data.add_component(comp_a, 'uniform') data.add_component(comp_b, 'normal') return data
def panda_process(indf): """ Build a data set from a table using pandas. This attempts to respect categorical data input by letting pandas.read_csv infer the type """ result = Data() for name, column in indf.iteritems(): if (column.dtype == np.object) | (column.dtype == np.bool): # try to salvage numerical data coerced = column.convert_objects(convert_numeric=True) if (coerced.dtype != column.dtype) and coerced.isnull().mean() < 0.4: c = Component(coerced.values) else: # pandas has a 'special' nan implementation and this doesn't # play well with np.unique c = CategoricalComponent(column.fillna('')) else: c = Component(column.values) # convert header to string - in some cases if the first row contains # numbers, these are cast to numerical types, so we want to change that # here. if not isinstance(name, six.string_types): name = str(name) # strip off leading # name = name.strip() if name.startswith('#'): name = name[1:].strip() result.add_component(c, name) return result
def npz_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np npy_data = np.load(filename) groups = [] for groupname in sorted(npy_data.files): d = Data(label=groupname) arr = npy_data[groupname] if not hasattr(arr.dtype, 'names'): raise ValueError("Numpy save file loading currently only supports structured" " arrays, e.g., with specified names.") for name in arr.dtype.names: comp = Component.autotyped(arr[name]) d.add_component(comp, label=name) groups.append(d) return groups
def npy_npz_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np data = np.load(filename) if isinstance(data, np.ndarray): data = {None: data} groups = [] for groupname in sorted(data): d = Data(label=groupname) arr = data[groupname] if arr.dtype.names is None: comp = Component.autotyped(arr) d.add_component(comp, label='array') else: for name in arr.dtype.names: comp = Component.autotyped(arr[name]) d.add_component(comp, label=name) groups.append(d) return groups
def _load_data(rec, context): label = rec['label'] result = Data(label=label) result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda: None comps = [list(map(context.object, [cid, comp])) for cid, comp in rec['components']] comps = sorted(comps, key=lambda x: isinstance(x[1], (DerivedComponent, CoordinateComponent))) for cid, comp in comps: if isinstance(comp, CoordinateComponent): comp._data = result result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] for s in rec['subsets']: result.add_subset(context.object(s)) return result
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) master.coords = data[0].coords for d in data: skip = d.pixel_component_ids + d.world_component_ids for c in d.components: if c in skip: continue if c in master.components: # already present (via a link) continue taken = [_.label for _ in master.components] lbl = c.label # Special-case 'PRIMARY', rename to data label if lbl == 'PRIMARY': lbl = d.label # First-pass disambiguation, try component_data if lbl in taken: lbl = '%s_%s' % (lbl, d.label) lbl = disambiguate(lbl, taken) c._label = lbl master.add_component(d.get_component(c), c) self.remove(d) return self
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) master.coords = data[0].coords for i, d in enumerate(data): if isinstance(d.coords, WCSCoordinates): master.coords = d.coords break # Find ambiguous components (ones which have labels in more than one # dataset from collections import Counter clabel_count = Counter([c.label for d in data for c in d.visible_components]) for d in data: for c in d.components: if c in master.components: # already present (via a link) continue lbl = c.label if clabel_count[lbl] > 1: lbl = lbl + " [{0}]".format(d.label) c._label = lbl c.parent = master master.add_component(d.get_component(c), c) self.remove(d) return self
def load_stacked_sequence(self, raster_data): for window, window_data in raster_data.items(): w_data = Data(label=f"{window.replace(' ', '_')}") w_data.coords = window_data.wcs w_data.add_component(Component(window_data.data), f"{window.replace(' ', '_')}") w_data.style = VisualAttributes(color='#7A617C') self.datasets.append(w_data)
def _ndarray_to_glue_data(arr, data_label): if arr.ndim != 2: raise ValueError(f'Imviz cannot load this array with ndim={arr.ndim}') data = Data(label=data_label) component = Component.autotyped(arr) data.add_component(component=component, label='DATA') yield data, data_label
def hdf5_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- source: str or HDUList The pathname to the FITS file. If an HDUList is passed in, simply use that. """ import h5py from astropy.table import Table # Open file file_handle = h5py.File(filename, 'r') # Define function to read # Read in all datasets datasets = extract_hdf5_datasets(file_handle) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format(label_base, key) if datasets[key].dtype.kind in ('f', 'i'): if auto_merge and datasets[key].value.shape in data_by_shape: data = data_by_shape[datasets[key].value.shape] else: data = Data(label=label) data_by_shape[datasets[key].value.shape] = data groups[label] = data data.add_component(component=datasets[key].value, label=key) else: table = Table.read(datasets[key], format='hdf5') data = Data(label=label) groups[label] = data for column_name in table.columns: column = table[column_name] if column.ndim == 1: component = Component(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn( "HDF5: Ignoring vector column {0}".format(column_name)) # Close HDF5 file file_handle.close() return [groups[idx] for idx in groups]
def read_header(header): out = [] for stream in header.stream_names: result = Data(label="{stream}_{uid}".format(stream=stream, uid=header.start['uid'])) tbl = header.table(stream, fill=True) for col in tbl.columns: result.add_component(tbl[col], str(col)) out.append(result) return out
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) for d in data: skip = d.pixel_component_ids + d.world_component_ids for c in d.components: if c in skip: continue if c in master.components: # already present (via a link) continue taken = [_.label for _ in master.components] lbl = c.label # Special-case 'PRIMARY', rename to data label if lbl == 'PRIMARY': lbl = d.label # First-pass disambiguation, try component_data if lbl in taken: lbl = '%s_%s' % (lbl, d.label) lbl = disambiguate(lbl, taken) c._label = lbl master.add_component(d.get_component(c), c) self.remove(d) return self
def load_sunpy_map(self, sunpy_map): sunpy_map_loaded = sunpy.map.Map(sunpy_map) label = 'sunpy-map-' + sunpy_map_loaded.name data = Data(label=label) data.coords = sunpy_map_loaded.wcs # preferred way, preserves more info in some cases data.meta = sunpy_map_loaded.meta data.add_component(Component(sunpy_map_loaded.data), sunpy_map_loaded.name) data.style = VisualAttributes(color='#FDB813', preferred_cmap=sunpy_map.cmap) self.datasets.append(data)
def load_sji(self, sji): with fits.open(sji) as hdul: hdul.verify("fix") label = hdul[0].header['TDESC1'] + hdul[0].header['OBSID'] data = Data(label=label) data.coords = WCSCoordinates(hdul[0].header) data.meta = hdul[0].header preferred_cmap_name = 'IRIS ' + hdul[0].header['TDESC1'].replace( '_', ' ') data.style = VisualAttributes(preferred_cmap=preferred_cmap_name) data.add_component(Component(hdul[0].data), label) self.datasets.append(data)
def make_test_data(): data = Data(label="Test Cat Data 1") comp_x1 = Component(np.array([4, 5, 6, 3])) comp_y1 = Component(np.array([1, 2, 3, 2])) comp_z1 = Component(np.array([2, 3, 4, 1])) data.add_component(comp_x1, 'x_gal') data.add_component(comp_y1, 'y_gal') data.add_component(comp_z1, 'z_gal') return data
def load_sequence(self, raster_data): for window, window_data in raster_data.items(): for i, scan_data in enumerate(window_data): w_data = Data( label= f"{window.replace(' ', '_')}-{scan_data.meta['OBSID']}-scan-{i}" ) w_data.coords = scan_data.wcs w_data.add_component(Component(scan_data.data), f"{window.replace(' ', '_')}-scan-{i}") w_data.meta = scan_data.meta w_data.style = VisualAttributes(color='#5A4FCF') self.datasets.append(w_data)
def test_data(): data = Data(label="Test Data 1") data2 = Data(label="Teset Data 2") comp_a = Component(np.array([1, 2, 3])) comp_b = Component(np.array([1, 2, 3])) comp_c = Component(np.array([2, 4, 6])) comp_d = Component(np.array([1, 3, 5])) data.add_component(comp_a, 'a') data.add_component(comp_b, 'b') data2.add_component(comp_c, 'c') data2.add_component(comp_d, 'd') return data, data2
def _load_data(rec, context): label = rec['label'] result = Data(label=label) if 'coords' in rec: result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda ndim: None comps = [ list(map(context.object, [cid, comp])) for cid, comp in rec['components'] ] for icomp, (cid, comp) in enumerate(comps): if isinstance(comp, CoordinateComponent): comp._data = result # For backward compatibility, we need to check for cases where # the component ID for the pixel components was not a PixelComponentID # and upgrade it to one. This can be removed once we no longer # support pre-v0.8 session files. if not comp.world and not isinstance(cid, PixelComponentID): cid = PixelComponentID(comp.axis, cid.label, parent=cid.parent) comps[icomp] = (cid, comp) result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] if getattr(result, 'coords') is not None: assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] else: assert len(coord) == result.ndim result._pixel_component_ids = coord # We can now re-generate the coordinate links result._set_up_coordinate_component_links(result.ndim) for s in rec['subsets']: result.add_subset(context.object(s)) return result
def test_categorical_data(): data = Data(label="Test Cat Data 1") data2 = Data(label="Teset Cat Data 2") comp_x1 = CategoricalComponent(np.array(['a', 'a', 'b'])) comp_y1 = Component(np.array([1, 2, 3])) comp_x2 = CategoricalComponent(np.array(['c', 'a', 'b'])) comp_y2 = Component(np.array([1, 3, 5])) data.add_component(comp_x1, 'x1') data.add_component(comp_y1, 'y1') data2.add_component(comp_x2, 'x2') data2.add_component(comp_y2, 'y2') return data, data2
def test_high_cardinatility_timing(self): card = 50000 data = Data() card_data = [str(num) for num in range(card)] data.add_component(Component(np.arange(card * 5)), 'y') data.add_component( CategoricalComponent(np.repeat([card_data], 5)), 'xcat') self.add_data(data) comp = data.find_component_id('xcat') timer_func = partial(self.client._set_xydata, 'x', comp) timer = timeit(timer_func, number=1) assert timer < 3 # this is set for Travis speed
def hdf5_reader(filename, auto_merge=False, memmap=True, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- filename : str The filename of the HDF5 file memmap : bool, optional Whether to use memory mapping """ from astropy.table import Table # Read in all datasets datasets = extract_hdf5_datasets(filename, memmap=memmap) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format(label_base, key) array = datasets[key] if isinstance(array, Table): data = Data(label=label) groups[label] = data for column_name in array.columns: column = array[column_name] if column.ndim == 1: component = Component.autotyped(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn("HDF5: Ignoring vector column {0}".format(column_name)) else: if auto_merge and array.shape in data_by_shape: data = data_by_shape[datasets[key].shape] else: data = Data(label=label) data_by_shape[array.shape] = data groups[label] = data data.add_component(component=datasets[key], label=key[1:]) return [groups[idx] for idx in groups]
def test_high_cardinatility_timing(self): card = 50000 data = Data() card_data = [str(num) for num in range(card)] data.add_component(Component(np.arange(card * 5)), 'y') data.add_component(CategoricalComponent(np.repeat([card_data], 5)), 'xcat') self.add_data(data) comp = data.find_component_id('xcat') timer_func = partial(self.client._set_xydata, 'x', comp) timer = timeit(timer_func, number=1) assert timer < 3 # this is set for Travis speed
def hdf5_reader(filename, auto_merge=False, memmap=True, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- filename : str The filename of the HDF5 file memmap : bool, optional Whether to use memory mapping """ from astropy.table import Table # Read in all datasets datasets = extract_hdf5_datasets(filename, memmap=memmap) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format(label_base, key) array = datasets[key] if isinstance(array, Table): data = Data(label=label) groups[label] = data for column_name in array.columns: column = array[column_name] if column.ndim == 1: component = Component.autotyped(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn( "HDF5: Ignoring vector column {0}".format(column_name)) else: if auto_merge and array.shape in data_by_shape: data = data_by_shape[datasets[key].shape] else: data = Data(label=label) data_by_shape[array.shape] = data groups[label] = data data.add_component(component=datasets[key], label=key[1:]) return [groups[idx] for idx in groups]
def _parse_iris_raster(data, label): """ Parse IRIS Level 2 raster files so that it can be loaded by glue. """ w_dataset = [] for window, window_data in data.items(): for i, scan_data in enumerate(window_data): w_data = Data(label=f"{window.replace(' ', '_')}-{scan_data.meta['OBSID']}-scan-{i}") w_data.coords = WCSCoordinates(scan_data.wcs.to_header()) w_data.add_component(Component(scan_data.data), f"{window.replace(' ', '_')}-{scan_data.meta['OBSID']}-scan-{i}") w_data.meta = scan_data.meta w_data.style = VisualAttributes(color='#5A4FCF') w_dataset.append(w_data) return w_dataset
def newView(self, type="scatter", components=[], title="New View", **kwargs): only_subsets = kwargs.get('only_subsets', False) only_view = kwargs.get('only_view', False) if (self.parent is not None): kwargs.setdefault('modal', self.parent.modal) if (self.debug is not None): kwargs.setdefault('debug', self.debug) gp = None mode = "tab-after" if (len(self.active_views.values()) == 0): mode = "split-bottom" kwargs.setdefault('mode', mode) if only_view is False: gp = self.factory.createGluePlot(type, self.data, components, title, **kwargs) else: data = Data(label=self.data.label) for c in components: data.add_component(self.data[c, self.selection], label=c) data.get_component(c).color = self.data.get_component(c).color if (data.size > 0): gp = self.factory.createGluePlot(type, data, components, title, **kwargs) if gp is not None: if only_view is False: gp.setParent(self) key = id(gp.window) self.active_views[key] = gp self.views[key] = { 'type': type, 'components': components, 'title': title, 'kwargs': kwargs } if isinstance(gp.window, Floatview): gp.window.observe( lambda changes: GlueManager.removeViewIfDisposed( self, gp.window), 'uid') self.parent.updateHistory() return gp
def _hdu2data(hdu, data_label, hdulist, include_wcs=True): if 'BUNIT' in hdu.header and _validate_bunit(hdu.header['BUNIT'], raise_error=False): bunit = hdu.header['BUNIT'] else: bunit = '' comp_label = f'{hdu.name.upper()},{hdu.ver}' new_data_label = f'{data_label}[{comp_label}]' data = Data(label=new_data_label) if include_wcs: data.coords = WCS(hdu.header, hdulist) component = Component.autotyped(hdu.data, units=bunit) data.add_component(component=component, label=comp_label) return data, new_data_label
def casalike_cube(filename, **kwargs): """ This provides special support for 4D CASA FITS - like cubes, which have 2 spatial axes, a spectral axis, and a stokes axis in that order. Each stokes cube is split out as a separate component """ from astropy.io import fits result = Data() with fits.open(filename, **kwargs) as hdulist: array = hdulist[0].data header = hdulist[0].header result.coords = coordinates_from_header(header) for i in range(array.shape[0]): result.add_component(array[[i]], label='STOKES %i' % i) return result
def _load_data(rec, context): label = rec['label'] result = Data(label=label) result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda ndim: None comps = [list(map(context.object, [cid, comp])) for cid, comp in rec['components']] for icomp, (cid, comp) in enumerate(comps): if isinstance(comp, CoordinateComponent): comp._data = result # For backward compatibility, we need to check for cases where # the component ID for the pixel components was not a PixelComponentID # and upgrade it to one. This can be removed once we no longer # support pre-v0.8 session files. if not comp.world and not isinstance(cid, PixelComponentID): cid = PixelComponentID(comp.axis, cid.label, parent=cid.parent) comps[icomp] = (cid, comp) result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] # We can now re-generate the coordinate links result._set_up_coordinate_component_links(result.ndim) for s in rec['subsets']: result.add_subset(context.object(s)) return result
def _hdu2data(hdu, data_label, hdulist, include_wcs=True): if 'BUNIT' in hdu.header: bunit = _validate_bunit(hdu.header['BUNIT'], raise_error=False) else: bunit = '' comp_label = f'{hdu.name.upper()},{hdu.ver}' new_data_label = f'{data_label}[{comp_label}]' data = Data(label=new_data_label) if hdulist is not None and hdu.name != 'PRIMARY' and 'PRIMARY' in hdulist: data.meta.update(dict(hdulist['PRIMARY'].header)) data.meta.update(dict(hdu.header)) if include_wcs: data.coords = WCS(hdu.header, hdulist) component = Component.autotyped(hdu.data, units=bunit) data.add_component(component=component, label=comp_label) return data, new_data_label
def pandas_to_glue(df, label='data', name_map=default_name_map): """Convert dataframe to glue.core.data.Data. Glue categorical variables require hashing, store array of unhashable components in ComponentID._unhashable. Override column names in name_map with dictionary values. """ data = Data(label=label) for c in df.columns: c_name = map_column_names(c) try: data.add_component(df[c], c_name) except TypeError: # pd.factorize error with int list input to CategoricalComponent r = ['%09d' % i for i in range(len(df[c]))] cc = CategoricalComponent(r) c_id = ComponentID(c_name) c_id._unhashable = np.array(df[c]) data.add_component(cc, c_id) return data
def _jwst2data(file_obj, ext, data_label): comp_label = ext.upper() new_data_label = f'{data_label}[{comp_label}]' data = Data(label=new_data_label) unit_attr = f'bunit_{ext}' try: # This is very specific to JWST pipeline image output. with AsdfInFits.open(file_obj) as af: dm = af.tree dm_meta = af.tree["meta"] if (unit_attr in dm_meta and _validate_bunit(dm_meta[unit_attr], raise_error=False)): bunit = dm_meta[unit_attr] else: bunit = '' # This is instance of gwcs.WCS, not astropy.wcs.WCS if 'wcs' in dm_meta: data.coords = dm_meta['wcs'] imdata = dm[ext] component = Component.autotyped(imdata, units=bunit) # Might have bad GWCS. If so, we exclude it. try: data.add_component(component=component, label=comp_label) except Exception: # pragma: no cover data.coords = None data.add_component(component=component, label=comp_label) # TODO: Do not need this when jwst.datamodels finally its own package. # This might happen for grism image; fall back to FITS loader without WCS. except Exception: if ext == 'data': ext = 'sci' hdu = file_obj[ext] return _hdu2data(hdu, data_label, file_obj, include_wcs=False) return data, new_data_label
def test_indexed(self): # Here we slice two of the dimensions and then compare the results to a # manually sliced dataset. derived = IndexedData(self.data, (None, 2, None, 4, None)) manual = Data() manual.add_component(self.data[self.x_id][:, 2, :, 4, :], label=self.x_id) manual.add_component(self.data[self.y_id][:, 2, :, 4, :], label=self.y_id) assert derived.label == 'Test data[:,2,:,4,:]' assert derived.shape == manual.shape assert [str(c) for c in derived.main_components] == [str(c) for c in manual.main_components] assert derived.get_kind(self.x_id) == manual.get_kind(self.x_id) for view in [None, (1, slice(None), slice(1, 4))]: assert_equal(derived.get_data(self.x_id, view=view), manual.get_data(self.x_id, view=view)) assert_equal(derived.get_mask(self.subset_state, view=view), manual.get_mask(self.subset_state, view=view)) bounds = [2, (-5, 5, 10), (-3, 3, 10)] assert_equal(derived.compute_fixed_resolution_buffer(bounds=bounds, target_cid=self.x_id), manual.compute_fixed_resolution_buffer(bounds=bounds, target_cid=self.x_id)) assert_equal(derived.compute_statistic('mean', self.x_id), manual.compute_statistic('mean', self.x_id)) assert_equal(derived.compute_statistic('mean', self.x_id, axis=2), manual.compute_statistic('mean', self.x_id, axis=2)) assert_equal(derived.compute_statistic('mean', self.x_id, subset_state=self.subset_state), manual.compute_statistic('mean', self.x_id, subset_state=self.subset_state)) assert_equal(derived.compute_histogram([self.x_id], range=[(0, 1000)], bins=[30]), manual.compute_histogram([self.x_id], range=[(0, 1000)], bins=[30])) assert_equal(derived.compute_histogram([self.x_id], range=[(0, 1000)], bins=[30], subset_state=self.subset_state), manual.compute_histogram([self.x_id], range=[(0, 1000)], bins=[30], subset_state=self.subset_state))
def test_limits_inf(self): d = Data() x = Component(np.array([[1, 2], [np.infty, 4]])) y = Component(np.array([[2, 4], [-np.infty, 8]])) xid = d.add_component(x, 'x') yid = d.add_component(y, 'y') self.collect.append(d) self.client.add_layer(d) self.client.xatt = xid self.client.yatt = yid assert self.client._visible_limits(0) == (1, 4) assert self.client._visible_limits(1) == (2, 8)
def npy_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np npy_data = np.load(filename) if not hasattr(npy_data.dtype, 'names'): raise ValueError("Numpy save file loading currently only supports structured" " arrays, e.g., with specified names.") d = Data() for name in npy_data.dtype.names: comp = Component(npy_data[name]) d.add_component(comp, label=name) return d
def gridded_data(filename, format='auto', **kwargs): result = Data() # Try and automatically find the format if not specified if format == 'auto': format = file_format(filename) # Read in the data if is_fits(filename): from astropy.io import fits arrays = extract_data_fits(filename, **kwargs) header = fits.getheader(filename) result.coords = coordinates_from_header(header) elif is_hdf5(filename): arrays = extract_data_hdf5(filename, **kwargs) else: raise Exception("Unkonwn format: %s" % format) for component_name in arrays: comp = Component.autotyped(arrays[component_name]) result.add_component(comp, component_name) return result
def img_data(file_name): """Load common image files into a Glue data object""" result = Data() data = img_loader(file_name) data = np.flipud(data) shp = data.shape comps = [] labels = [] # split 3 color images into each color plane if len(shp) == 3 and shp[2] in [3, 4]: comps.extend([data[:, :, 0], data[:, :, 1], data[:, :, 2]]) labels.extend(['red', 'green', 'blue']) if shp[2] == 4: comps.append(data[:, :, 3]) labels.append('alpha') else: comps = [data] labels = ['PRIMARY'] # look for AVM coordinate metadata try: from pyavm import AVM avm = AVM(str(file_name)) # avoid unicode wcs = avm.to_wcs() except: pass else: result.coords = coordinates_from_wcs(wcs) for c, l in zip(comps, labels): result.add_component(c, l) return result
def casalike_cube(filename, **kwargs): """ This provides special support for 4D CASA FITS - like cubes, which have 2 spatial axes, a spectral axis, and a stokes axis in that order. Each stokes cube is split out as a separate component """ from astropy.io import fits result = Data() if 'ignore_missing_end' not in kwargs: kwargs['ignore_missing_end'] = True with fits.open(filename, **kwargs) as hdulist: array = hdulist[0].data header = hdulist[0].header result.coords = coordinates_from_header(header) for i in range(array.shape[0]): units = header.get('BUNIT') component = Component.autotyped(array[[i]], units=units) result.add_component(component, label='STOKES %i' % i) return result
def test_ticks_go_back_after_changing(self): """ If you change to a categorical axis and then change back to a numeric, the axis ticks should fix themselves properly. """ data = Data() data.add_component(Component(np.arange(100)), 'y') data.add_component( CategoricalComponent(['a'] * 50 + ['b'] * 50), 'xcat') data.add_component(Component(2 * np.arange(100)), 'xcont') self.add_data(data=data) self.client.yatt = data.find_component_id('y') self.client.xatt = data.find_component_id('xcat') self.check_ticks(self.client.axes.xaxis, False, True) self.check_ticks(self.client.axes.yaxis, False, False) self.client.xatt = data.find_component_id('xcont') self.check_ticks(self.client.axes.yaxis, False, False) self.check_ticks(self.client.axes.xaxis, False, False)
def hdf5_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- source: str or HDUList The pathname to the FITS file. If an HDUList is passed in, simply use that. """ import h5py from astropy.table import Table # Open file file_handle = h5py.File(filename, 'r') # Define function to read # Read in all datasets datasets = extract_hdf5_datasets(file_handle) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format( label_base, key ) if datasets[key].dtype.kind in ('f', 'i'): if auto_merge and datasets[key].value.shape in data_by_shape: data = data_by_shape[datasets[key].value.shape] else: data = Data(label=label) data_by_shape[datasets[key].value.shape] = data groups[label] = data data.add_component(component=datasets[key].value, label=key) else: table = Table.read(datasets[key], format='hdf5') data = Data(label=label) groups[label] = data for column_name in table.columns: column = table[column_name] if column.ndim == 1: component = Component(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn("HDF5: Ignoring vector column {0}".format(column_name)) # Close HDF5 file file_handle.close() return [groups[idx] for idx in groups]
def _parse_data_dataframe(data, label): label = label or 'Data' result = Data(label=label) for c in data.columns: result.add_component(data[c], str(c)) return [result]
def fits_reader(source, auto_merge=False, exclude_exts=None, label=None): """ Read in all extensions from a FITS file. Parameters ---------- source: str or HDUList The pathname to the FITS file. If an HDUList is passed in, simply use that. auto_merge: bool Merge extensions that have the same shape and only one has a defined WCS. exclude_exts: [hdu, ] or [index, ] List of HDU's to exclude from reading. This can be a list of HDU's or a list of HDU indexes. """ from astropy.io import fits from astropy.table import Table exclude_exts = exclude_exts or [] if isinstance(source, fits.hdu.hdulist.HDUList): hdulist = source close_hdulist = False else: hdulist = fits.open(source, ignore_missing_end=True) hdulist.verify('fix') close_hdulist = True groups = OrderedDict() extension_by_shape = OrderedDict() if label is not None: label_base = label else: hdulist_name = hdulist.filename() if hdulist_name is None: hdulist_name = "HDUList" label_base = basename(hdulist_name).rpartition('.')[0] if not label_base: label_base = basename(hdulist_name) # Create a new image Data. def new_data(suffix=True): if suffix: label = '{0}[{1}]'.format(label_base, hdu_name) else: label = label_base data = Data(label=label) data.coords = coords # We need to be careful here because some header values are special # objects that we should convert to strings for key, value in hdu.header.items(): if (key == 'COMMENT' or key == 'HISTORY'): if key not in data.meta: data.meta[key] = [str(value)] else: data.meta[key].append(str(value)) elif isinstance(value, string_types) or isinstance(value, (int, float, bool)): data.meta[key] = value else: data.meta[key] = str(value) groups[hdu_name] = data extension_by_shape[shape] = hdu_name return data for extnum, hdu in enumerate(hdulist): hdu_name = hdu.name if hdu.name else "HDU{0}".format(extnum) if (hdu.data is not None and hdu.data.size > 0 and hdu_name not in exclude_exts and extnum not in exclude_exts): if is_image_hdu(hdu): shape = hdu.data.shape coords = coordinates_from_header(hdu.header) units = hdu.header.get('BUNIT') if not auto_merge or has_wcs(coords): data = new_data(suffix=len(hdulist) > 1) else: try: data = groups[extension_by_shape[shape]] except KeyError: data = new_data(suffix=len(hdulist) > 1) component = Component.autotyped(hdu.data, units=units) data.add_component(component=component, label=hdu_name) elif is_table_hdu(hdu): # Loop through columns and make component list table = Table.read(hdu, format='fits') label = '{0}[{1}]'.format(label_base, hdu_name) data = Data(label=label) groups[hdu_name] = data for column_name in table.columns: column = table[column_name] if column.ndim != 1: warnings.warn("Dropping column '{0}' since it is not 1-dimensional".format(column_name)) continue component = Component.autotyped(column, units=column.unit) data.add_component(component=component, label=column_name) if close_hdulist: hdulist.close() return [groups[idx] for idx in groups]
def fits_reader(source, auto_merge=False, exclude_exts=None, label=None): """ Read in all extensions from a FITS file. Parameters ---------- source: str or HDUList The pathname to the FITS file. If an HDUList is passed in, simply use that. auto_merge: bool Merge extensions that have the same shape and only one has a defined WCS. exclude_exts: [hdu, ] or [index, ] List of HDU's to exclude from reading. This can be a list of HDU's or a list of HDU indexes. """ from astropy.io import fits from astropy.table import Table exclude_exts = exclude_exts or [] if not isinstance(source, fits.hdu.hdulist.HDUList): hdulist = fits.open(source, ignore_missing_end=True) hdulist.verify('fix') else: hdulist = source groups = OrderedDict() extension_by_shape = OrderedDict() if label is not None: label_base = label else: hdulist_name = hdulist.filename() if hdulist_name is None: hdulist_name = "HDUList" label_base = basename(hdulist_name).rpartition('.')[0] if not label_base: label_base = basename(hdulist_name) # Create a new image Data. def new_data(): label = '{0}[{1}]'.format( label_base, hdu_name ) data = Data(label=label) data.coords = coords groups[hdu_name] = data extension_by_shape[shape] = hdu_name return data for extnum, hdu in enumerate(hdulist): hdu_name = hdu.name if hdu.name else "HDU{0}".format(extnum) if (hdu.data is not None and hdu.data.size > 0 and hdu_name not in exclude_exts and extnum not in exclude_exts): if is_image_hdu(hdu): shape = hdu.data.shape coords = coordinates_from_header(hdu.header) if not auto_merge or has_wcs(coords): data = new_data() else: try: data = groups[extension_by_shape[shape]] except KeyError: data = new_data() data.add_component(component=hdu.data, label=hdu_name) elif is_table_hdu(hdu): # Loop through columns and make component list table = Table.read(hdu, format='fits') label = '{0}[{1}]'.format( label_base, hdu_name ) data = Data(label=label) groups[hdu_name] = data for column_name in table.columns: column = table[column_name] if column.ndim != 1: warnings.warn("Dropping column '{0}' since it is not 1-dimensional".format(column_name)) continue component = Component.autotyped(column, units=column.unit) data.add_component(component=component, label=column_name) return [groups[idx] for idx in groups]
class TestCategoricalHistogram(TestHistogramClient): def setup_method(self, method): self.data = Data(y=[-1, -1, -1, -2, -2, -2, -3, -5, -7]) self.data.add_component(CategoricalComponent(['a', 'a', 'a', 'b', 'c', 'd', 'd', 'e', 'f']), 'x') self.subset = self.data.new_subset() self.collect = DataCollection(self.data) self.client = HistogramClient(self.collect, FIGURE) self.axes = self.client.axes FIGURE.canvas.draw = MagicMock() assert FIGURE.canvas.draw.call_count == 0 def test_xlimit_single_set(self): self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) self.client.xlimits = (None, 5) assert self.client.xlimits == (-0.5, 5) self.client.xlimits = (3, None) assert self.client.xlimits == (3, 5) def test_default_xlimits(self): self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) assert self.client.xlimits == (-0.5, 5.5) self.client.set_component(self.data.id['y']) assert self.client.xlimits == (-7, -1) def test_change_default_bins(self): self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) assert self.client.nbins == 6 def test_tick_labels(self): self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) correct_labels = ['a', 'b', 'c', 'd', 'e', 'f'] formatter = self.client.axes.xaxis.get_major_formatter() xlabels = [formatter.format_data(pos) for pos in range(6)] assert correct_labels == xlabels def test_apply_roi(self): self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) # bins are 1...4 self.data.edit_subset = [self.data.subsets[0]] roi = MagicMock() roi.to_polygon.return_value = [1.2, 2, 4], [2, 3, 4] self.client.apply_roi(roi) state = self.data.subsets[0].subset_state assert isinstance(state, CategoricalROISubsetState) np.testing.assert_equal(self.data.subsets[0].subset_state.roi.categories, np.array(['b', 'c', 'd', 'e'])) # REMOVED TESTS def test_xlog_axes_labels(self): """ log-scale doesn't make sense for categorical data""" pass def test_xlog_snaps_limits(self): """ log-scale doesn't make sense for categorical data""" pass def test_apply_roi_xlog(self): """ log-scale doesn't make sense for categorical data""" pass def test_nbin_override_persists_over_attribute_change(self): # regression test for #398 self.collect.append(self.data) self.client.add_layer(self.data) self.client.set_component(self.data.id['x']) self.client.nbins = 7 self.client.set_component(self.data.id['y']) assert self.client.nbins == 7
def test_cube(): data = Data(label="Test Cube") comp_a = Component(np.ones((16, 16, 16))) data.add_component(comp_a, 'test_3') return data